2012-07-27 21:02:47

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH] rpc.gssd: don't call poll() twice a second

From: "J. Bruce Fields" <[email protected]>

Use the self-pipe trick instead.

(Alternatively, we could use ppoll. That wasn't supported before
2.6.16, whereas gss was introduced in 2.5. I was trying to be
conservative about compatibility with older kernels, but maybe we don't
care at this point.)

Signed-off-by: J. Bruce Fields <[email protected]>
---
utils/gssd/gssd_main_loop.c | 52 +++++++++++++++++++++++++------------------
1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
index c18e12c..8886eb6 100644
--- a/utils/gssd/gssd_main_loop.c
+++ b/utils/gssd/gssd_main_loop.c
@@ -57,15 +57,18 @@
extern struct pollfd *pollarray;
extern int pollsize;

-#define POLL_MILLISECS 500
+static int pipefd[2];

-static volatile int dir_changed = 1;
+static void something_changed(void)
+{
+ if (1 != write(pipefd[1], "!", 1))
+ printerr(2, "weird; maybe an interrupt?");
+}

static void dir_notify_handler(int sig, siginfo_t *si, void *data)
{
printerr(2, "dir_notify_handler: sig %d si %p data %p\n", sig, si, data);
-
- dir_changed = 1;
+ something_changed();
}

static void
@@ -80,7 +83,7 @@ scan_poll_results(int ret)
if (i >= 0 && pollarray[i].revents) {
if (pollarray[i].revents & POLLHUP) {
clp->gssd_close_me = 1;
- dir_changed = 1;
+ something_changed();
}
if (pollarray[i].revents & POLLIN)
handle_gssd_upcall(clp);
@@ -93,7 +96,7 @@ scan_poll_results(int ret)
if (i >= 0 && pollarray[i].revents) {
if (pollarray[i].revents & POLLHUP) {
clp->krb5_close_me = 1;
- dir_changed = 1;
+ something_changed();
}
if (pollarray[i].revents & POLLIN)
handle_krb5_upcall(clp);
@@ -123,11 +126,13 @@ topdirs_add_entry(struct dirent *dent)
}
snprintf(tdi->dirname, PATH_MAX, "%s/%s", pipefs_dir, dent->d_name);
tdi->fd = open(tdi->dirname, O_RDONLY);
- if (tdi->fd != -1) {
- fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
- fcntl(tdi->fd, F_NOTIFY,
- DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);
+ if (tdi->fd == -1) {
+ printerr(0, "ERROR: failed to open %s\n", tdi->dirname);
+ free(tdi);
+ return -1;
}
+ fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
+ fcntl(tdi->fd, F_NOTIFY, DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);

TAILQ_INSERT_HEAD(&topdirs_list, tdi, list);
return 0;
@@ -185,6 +190,7 @@ gssd_run()
int ret;
struct sigaction dn_act;
sigset_t set;
+ char buf;

/* Taken from linux/Documentation/dnotify.txt: */
dn_act.sa_sigaction = dir_notify_handler;
@@ -202,26 +208,28 @@ gssd_run()

init_client_list();

+ ret = pipe2(pipefd, O_NONBLOCK);
+ if (ret == -1)
+ return;
+ pollarray[0].fd = pipefd[0];
+ pollarray[0].events = POLLIN;
+
printerr(1, "beginning poll\n");
while (1) {
- while (dir_changed) {
- dir_changed = 0;
- if (update_client_list()) {
- /* Error msg is already printed */
- exit(1);
- }
- }
- /* race condition here: dir_changed could be set before we
- * enter the poll, and we'd never notice if it weren't for the
- * timeout. */
- ret = poll(pollarray, pollsize, POLL_MILLISECS);
+ ret = poll(pollarray, pollsize, -1);
if (ret < 0) {
if (errno != EINTR)
printerr(0,
"WARNING: error return from poll\n");
} else if (ret == 0) {
- /* timeout */
+ /* timeout?? */
} else { /* ret > 0 */
+ if (pollarray[0].revents) {
+ if (1 != read(pipefd[0], &buf, 1))
+ printerr(2, "weird; maybe an interrupt?");
+ if (update_client_list())
+ exit(1);
+ }
scan_poll_results(ret);
}
}
--
1.7.9.5



2012-07-31 18:24:55

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second



On 07/30/2012 06:10 PM, J. Bruce Fields wrote:
> On Mon, Jul 30, 2012 at 04:59:49PM -0400, J. Bruce Fields wrote:
>> On Sun, Jul 29, 2012 at 07:09:30PM -0400, Steve Dickson wrote:
>>>
>>>
>>> On 07/29/2012 01:48 PM, J. Bruce Fields wrote:
>>>> On Fri, Jul 27, 2012 at 07:08:53PM -0400, Jim Rees wrote:
>>>>> J. Bruce Fields wrote:
>>>>>
>>>>> + if (1 != write(pipefd[1], "!", 1))
>>>>> + printerr(2, "weird; maybe an interrupt?");
>>>>>
>>>>> Use Yoda conditions must we?
>>>>
>>>> Yeah, yeah. How about:
>>>>
>>>> static void something_changed(void)
>>>> {
>>>> - if (1 != write(pipefd[1], "!", 1))
>>>> - printerr(2, "weird; maybe an interrupt?");
>>>> + if (write(pipefd[1], "!", 1) != 1)
>>>> + printerr(0, "%s writing to pipe", strerror(errno));
>>>> }
>>>>
>>>> ?
>>> Better... IMHO.. but what's going to mean when we see that in some log?
>>
>> Beats me.
>
> Looking at it a little more: actually, if gssd is slow to process these
> events then in theory they could pile up, and we could eventually get
> EAGAIN/WOUDBLOCK.
>
> Which wouldn't be a problem, except that now we're modifying errno in a
> signal handler. So the signal handler should be saving and restoring
> errno.
>
> And also: I noticed one of the reasons gssd hasn't been completely
> reliable for me is that we already have a printerr() in the signal
> handler, and printerr() doesn't appear to be reentrant.
>
> Eh, I'm leaning toward just using ppoll. According to the man page that
> requires kernel >= 2.6.16, glibc >= 2.4. Is that OK?
I would think so....

steved.


2012-07-27 23:09:00

by Jim Rees

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second

J. Bruce Fields wrote:

+ if (1 != write(pipefd[1], "!", 1))
+ printerr(2, "weird; maybe an interrupt?");

Use Yoda conditions must we?

2012-07-31 21:22:21

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 2/4] rpc.gssd: don't call printerr from signal handler


On Jul 31, 2012, at 2:00 PM, J. Bruce Fields wrote:

> From: "J. Bruce Fields" <[email protected]>
>
> printerr() isn't actually safe to call from a signal handler. It might
> be possible to make it so, but I think this is the only case in
> nfs-utils where we try to, and I'm not convince it's worth it.
>
> This fixes a bug that would eventually cause mounts to hang when gssd
> is run with -vv.

Yes, I've seen this hang. gssd gets stuck on a futex() with -vv or higher.

> Signed-off-by: J. Bruce Fields <[email protected]>
> ---
> utils/gssd/gssd_main_loop.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> index 9954ffb..6914687 100644
> --- a/utils/gssd/gssd_main_loop.c
> +++ b/utils/gssd/gssd_main_loop.c
> @@ -61,10 +61,8 @@ extern int pollsize;
>
> static volatile int dir_changed = 1;
>
> -static void dir_notify_handler(int sig)
> +static void dir_notify_handler(__attribute__((unused))int sig)
> {
> - printerr(2, "dir_notify_handler: sig %d\n", sig);
> -
> dir_changed = 1;
> }
>
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




2012-07-31 20:59:33

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second

On Tue, Jul 31, 2012 at 12:50:42PM -0400, Steve Dickson wrote:
> On 07/30/2012 06:10 PM, J. Bruce Fields wrote:
> > Eh, I'm leaning toward just using ppoll. According to the man page that
> > requires kernel >= 2.6.16, glibc >= 2.4. Is that OK?
> I would think so....

OK. With ppoll, and split up into little pieces this time.

--b.

2012-07-31 21:00:53

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH 1/4] rpc.gssd: simplify signal handling

From: "J. Bruce Fields" <[email protected]>

We're not actually using the extra sa_sigaction parameters.

Signed-off-by: J. Bruce Fields <[email protected]>
---
utils/gssd/gssd_main_loop.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
index c18e12c..9954ffb 100644
--- a/utils/gssd/gssd_main_loop.c
+++ b/utils/gssd/gssd_main_loop.c
@@ -61,9 +61,9 @@ extern int pollsize;

static volatile int dir_changed = 1;

-static void dir_notify_handler(int sig, siginfo_t *si, void *data)
+static void dir_notify_handler(int sig)
{
- printerr(2, "dir_notify_handler: sig %d si %p data %p\n", sig, si, data);
+ printerr(2, "dir_notify_handler: sig %d\n", sig);

dir_changed = 1;
}
@@ -183,13 +183,12 @@ void
gssd_run()
{
int ret;
- struct sigaction dn_act;
+ struct sigaction dn_act = {
+ .sa_handler = dir_notify_handler
+ };
sigset_t set;

- /* Taken from linux/Documentation/dnotify.txt: */
- dn_act.sa_sigaction = dir_notify_handler;
sigemptyset(&dn_act.sa_mask);
- dn_act.sa_flags = SA_SIGINFO;
sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);

/* just in case the signal is blocked... */
--
1.7.9.5


2012-07-31 22:30:00

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpc.gssd: don't call poll() twice a second.


On Jul 31, 2012, at 2:27 PM, J. Bruce Fields wrote:

> On Tue, Jul 31, 2012 at 02:23:36PM -0700, Chuck Lever wrote:
>>
>> On Jul 31, 2012, at 2:00 PM, J. Bruce Fields wrote:
>>
>>> From: "J. Bruce Fields" <[email protected]>
>>>
>>> Use ppoll instead.
>>>
>>> (This breaks compatibility with kernels before 2.6.16 and glibc before
>>> 2.4).
>>
>> I think I prefer seeing this wrapped with some autoconf machinery to allow nfs-utils to continue to build on earlier systems.
>
> I was assuming we could drop support for older systems.

We went to a lot of trouble to get the IPv6 stuff to build correctly on older systems, and we still heard complaints.

> If not, then'd I'd much prefer to go back to the self-pipe trick than to
> deal with autoconf and ifdef'ing out all this stuff.

This patch is a tiny change. I don't think autoconf in this case is onerous. I'm happy to help you with it.

> --b.
>
>>
>>> Signed-off-by: J. Bruce Fields <[email protected]>
>>> ---
>>> utils/gssd/gssd_main_loop.c | 12 +++++-------
>>> 1 file changed, 5 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
>>> index 142c8c5..2d20fa2 100644
>>> --- a/utils/gssd/gssd_main_loop.c
>>> +++ b/utils/gssd/gssd_main_loop.c
>>> @@ -186,13 +186,14 @@ gssd_run()
>>> struct sigaction dn_act = {
>>> .sa_handler = dir_notify_handler
>>> };
>>> - sigset_t set;
>>> + sigset_t set, emptyset;
>>>
>>> sigemptyset(&dn_act.sa_mask);
>>> sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);
>>>
>>> - /* just in case the signal is blocked... */
>>> + sigemptyset(&emptyset);
>>> sigemptyset(&set);
>>> + /* just in case the signal is blocked... */
>>> sigaddset(&set, DNOTIFY_SIGNAL);
>>> sigprocmask(SIG_UNBLOCK, &set, NULL);
>>>
>>> @@ -210,16 +211,13 @@ gssd_run()
>>> exit(1);
>>> }
>>> }
>>> - /* race condition here: dir_changed could be set before we
>>> - * enter the poll, and we'd never notice if it weren't for the
>>> - * timeout. */
>>> - ret = poll(pollarray, pollsize, POLL_MILLISECS);
>>> + ret = ppoll(pollarray, pollsize, NULL, &emptyset);
>>> if (ret < 0) {
>>> if (errno != EINTR)
>>> printerr(0,
>>> "WARNING: error return from poll\n");
>>> } else if (ret == 0) {
>>> - /* timeout */
>>> + printerr(0, "WARNING: unexpected timeout\n");
>>> } else { /* ret > 0 */
>>> scan_poll_results(ret);
>>> }
>>> --
>>> 1.7.9.5
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to [email protected]
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>
>> --
>> Chuck Lever
>> chuck[dot]lever[at]oracle[dot]com
>>
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




2012-07-31 21:00:53

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH 4/4] rpc.gssd: don't call poll() twice a second.

From: "J. Bruce Fields" <[email protected]>

Use ppoll instead.

(This breaks compatibility with kernels before 2.6.16 and glibc before
2.4).

Signed-off-by: J. Bruce Fields <[email protected]>
---
utils/gssd/gssd_main_loop.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
index 142c8c5..2d20fa2 100644
--- a/utils/gssd/gssd_main_loop.c
+++ b/utils/gssd/gssd_main_loop.c
@@ -186,13 +186,14 @@ gssd_run()
struct sigaction dn_act = {
.sa_handler = dir_notify_handler
};
- sigset_t set;
+ sigset_t set, emptyset;

sigemptyset(&dn_act.sa_mask);
sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);

- /* just in case the signal is blocked... */
+ sigemptyset(&emptyset);
sigemptyset(&set);
+ /* just in case the signal is blocked... */
sigaddset(&set, DNOTIFY_SIGNAL);
sigprocmask(SIG_UNBLOCK, &set, NULL);

@@ -210,16 +211,13 @@ gssd_run()
exit(1);
}
}
- /* race condition here: dir_changed could be set before we
- * enter the poll, and we'd never notice if it weren't for the
- * timeout. */
- ret = poll(pollarray, pollsize, POLL_MILLISECS);
+ ret = ppoll(pollarray, pollsize, NULL, &emptyset);
if (ret < 0) {
if (errno != EINTR)
printerr(0,
"WARNING: error return from poll\n");
} else if (ret == 0) {
- /* timeout */
+ printerr(0, "WARNING: unexpected timeout\n");
} else { /* ret > 0 */
scan_poll_results(ret);
}
--
1.7.9.5


2012-07-31 22:20:53

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpc.gssd: don't call poll() twice a second.

On Tue, Jul 31, 2012 at 02:23:36PM -0700, Chuck Lever wrote:
>
> On Jul 31, 2012, at 2:00 PM, J. Bruce Fields wrote:
>
> > From: "J. Bruce Fields" <[email protected]>
> >
> > Use ppoll instead.
> >
> > (This breaks compatibility with kernels before 2.6.16 and glibc before
> > 2.4).
>
> I think I prefer seeing this wrapped with some autoconf machinery to allow nfs-utils to continue to build on earlier systems.

I was assuming we could drop support for older systems.

If not, then'd I'd much prefer to go back to the self-pipe trick than to
deal with autoconf and ifdef'ing out all this stuff.

--b.

>
> > Signed-off-by: J. Bruce Fields <[email protected]>
> > ---
> > utils/gssd/gssd_main_loop.c | 12 +++++-------
> > 1 file changed, 5 insertions(+), 7 deletions(-)
> >
> > diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> > index 142c8c5..2d20fa2 100644
> > --- a/utils/gssd/gssd_main_loop.c
> > +++ b/utils/gssd/gssd_main_loop.c
> > @@ -186,13 +186,14 @@ gssd_run()
> > struct sigaction dn_act = {
> > .sa_handler = dir_notify_handler
> > };
> > - sigset_t set;
> > + sigset_t set, emptyset;
> >
> > sigemptyset(&dn_act.sa_mask);
> > sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);
> >
> > - /* just in case the signal is blocked... */
> > + sigemptyset(&emptyset);
> > sigemptyset(&set);
> > + /* just in case the signal is blocked... */
> > sigaddset(&set, DNOTIFY_SIGNAL);
> > sigprocmask(SIG_UNBLOCK, &set, NULL);
> >
> > @@ -210,16 +211,13 @@ gssd_run()
> > exit(1);
> > }
> > }
> > - /* race condition here: dir_changed could be set before we
> > - * enter the poll, and we'd never notice if it weren't for the
> > - * timeout. */
> > - ret = poll(pollarray, pollsize, POLL_MILLISECS);
> > + ret = ppoll(pollarray, pollsize, NULL, &emptyset);
> > if (ret < 0) {
> > if (errno != EINTR)
> > printerr(0,
> > "WARNING: error return from poll\n");
> > } else if (ret == 0) {
> > - /* timeout */
> > + printerr(0, "WARNING: unexpected timeout\n");
> > } else { /* ret > 0 */
> > scan_poll_results(ret);
> > }
> > --
> > 1.7.9.5
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
>
>
>

2012-07-29 17:48:41

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second

On Fri, Jul 27, 2012 at 07:08:53PM -0400, Jim Rees wrote:
> J. Bruce Fields wrote:
>
> + if (1 != write(pipefd[1], "!", 1))
> + printerr(2, "weird; maybe an interrupt?");
>
> Use Yoda conditions must we?

Yeah, yeah. How about:

static void something_changed(void)
{
- if (1 != write(pipefd[1], "!", 1))
- printerr(2, "weird; maybe an interrupt?");
+ if (write(pipefd[1], "!", 1) != 1)
+ printerr(0, "%s writing to pipe", strerror(errno));
}

?

--b.

2012-07-31 21:00:53

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH 3/4] rpc.gssd: handle error to open toplevel directory

From: "J. Bruce Fields" <[email protected]>

Reverse the sense of the test here, and also add debugging and cleanup
in the error case.

(Though the lack of cleanup isn't currently a problem in practice since
we'll eventually exit in this case.)

Signed-off-by: J. Bruce Fields <[email protected]>
---
utils/gssd/gssd_main_loop.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
index 6914687..142c8c5 100644
--- a/utils/gssd/gssd_main_loop.c
+++ b/utils/gssd/gssd_main_loop.c
@@ -121,11 +121,13 @@ topdirs_add_entry(struct dirent *dent)
}
snprintf(tdi->dirname, PATH_MAX, "%s/%s", pipefs_dir, dent->d_name);
tdi->fd = open(tdi->dirname, O_RDONLY);
- if (tdi->fd != -1) {
- fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
- fcntl(tdi->fd, F_NOTIFY,
- DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);
+ if (tdi->fd == -1) {
+ printerr(0, "ERROR: failed to open %s\n", tdi->dirname);
+ free(tdi);
+ return -1;
}
+ fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
+ fcntl(tdi->fd, F_NOTIFY, DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);

TAILQ_INSERT_HEAD(&topdirs_list, tdi, list);
return 0;
--
1.7.9.5


2012-07-29 23:09:41

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second



On 07/29/2012 01:48 PM, J. Bruce Fields wrote:
> On Fri, Jul 27, 2012 at 07:08:53PM -0400, Jim Rees wrote:
>> J. Bruce Fields wrote:
>>
>> + if (1 != write(pipefd[1], "!", 1))
>> + printerr(2, "weird; maybe an interrupt?");
>>
>> Use Yoda conditions must we?
>
> Yeah, yeah. How about:
>
> static void something_changed(void)
> {
> - if (1 != write(pipefd[1], "!", 1))
> - printerr(2, "weird; maybe an interrupt?");
> + if (write(pipefd[1], "!", 1) != 1)
> + printerr(0, "%s writing to pipe", strerror(errno));
> }
>
> ?
Better... IMHO.. but what's going to mean when we see that in some log?

steved

2012-07-30 22:10:43

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second

On Mon, Jul 30, 2012 at 04:59:49PM -0400, J. Bruce Fields wrote:
> On Sun, Jul 29, 2012 at 07:09:30PM -0400, Steve Dickson wrote:
> >
> >
> > On 07/29/2012 01:48 PM, J. Bruce Fields wrote:
> > > On Fri, Jul 27, 2012 at 07:08:53PM -0400, Jim Rees wrote:
> > >> J. Bruce Fields wrote:
> > >>
> > >> + if (1 != write(pipefd[1], "!", 1))
> > >> + printerr(2, "weird; maybe an interrupt?");
> > >>
> > >> Use Yoda conditions must we?
> > >
> > > Yeah, yeah. How about:
> > >
> > > static void something_changed(void)
> > > {
> > > - if (1 != write(pipefd[1], "!", 1))
> > > - printerr(2, "weird; maybe an interrupt?");
> > > + if (write(pipefd[1], "!", 1) != 1)
> > > + printerr(0, "%s writing to pipe", strerror(errno));
> > > }
> > >
> > > ?
> > Better... IMHO.. but what's going to mean when we see that in some log?
>
> Beats me.

Looking at it a little more: actually, if gssd is slow to process these
events then in theory they could pile up, and we could eventually get
EAGAIN/WOUDBLOCK.

Which wouldn't be a problem, except that now we're modifying errno in a
signal handler. So the signal handler should be saving and restoring
errno.

And also: I noticed one of the reasons gssd hasn't been completely
reliable for me is that we already have a printerr() in the signal
handler, and printerr() doesn't appear to be reentrant.

Eh, I'm leaning toward just using ppoll. According to the man page that
requires kernel >= 2.6.16, glibc >= 2.4. Is that OK?

--b.

2012-07-31 21:23:42

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpc.gssd: don't call poll() twice a second.


On Jul 31, 2012, at 2:00 PM, J. Bruce Fields wrote:

> From: "J. Bruce Fields" <[email protected]>
>
> Use ppoll instead.
>
> (This breaks compatibility with kernels before 2.6.16 and glibc before
> 2.4).

I think I prefer seeing this wrapped with some autoconf machinery to allow nfs-utils to continue to build on earlier systems.

> Signed-off-by: J. Bruce Fields <[email protected]>
> ---
> utils/gssd/gssd_main_loop.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> index 142c8c5..2d20fa2 100644
> --- a/utils/gssd/gssd_main_loop.c
> +++ b/utils/gssd/gssd_main_loop.c
> @@ -186,13 +186,14 @@ gssd_run()
> struct sigaction dn_act = {
> .sa_handler = dir_notify_handler
> };
> - sigset_t set;
> + sigset_t set, emptyset;
>
> sigemptyset(&dn_act.sa_mask);
> sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);
>
> - /* just in case the signal is blocked... */
> + sigemptyset(&emptyset);
> sigemptyset(&set);
> + /* just in case the signal is blocked... */
> sigaddset(&set, DNOTIFY_SIGNAL);
> sigprocmask(SIG_UNBLOCK, &set, NULL);
>
> @@ -210,16 +211,13 @@ gssd_run()
> exit(1);
> }
> }
> - /* race condition here: dir_changed could be set before we
> - * enter the poll, and we'd never notice if it weren't for the
> - * timeout. */
> - ret = poll(pollarray, pollsize, POLL_MILLISECS);
> + ret = ppoll(pollarray, pollsize, NULL, &emptyset);
> if (ret < 0) {
> if (errno != EINTR)
> printerr(0,
> "WARNING: error return from poll\n");
> } else if (ret == 0) {
> - /* timeout */
> + printerr(0, "WARNING: unexpected timeout\n");
> } else { /* ret > 0 */
> scan_poll_results(ret);
> }
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com




2012-07-31 21:00:53

by J. Bruce Fields

[permalink] [raw]
Subject: [PATCH 2/4] rpc.gssd: don't call printerr from signal handler

From: "J. Bruce Fields" <[email protected]>

printerr() isn't actually safe to call from a signal handler. It might
be possible to make it so, but I think this is the only case in
nfs-utils where we try to, and I'm not convince it's worth it.

This fixes a bug that would eventually cause mounts to hang when gssd
is run with -vv.

Signed-off-by: J. Bruce Fields <[email protected]>
---
utils/gssd/gssd_main_loop.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
index 9954ffb..6914687 100644
--- a/utils/gssd/gssd_main_loop.c
+++ b/utils/gssd/gssd_main_loop.c
@@ -61,10 +61,8 @@ extern int pollsize;

static volatile int dir_changed = 1;

-static void dir_notify_handler(int sig)
+static void dir_notify_handler(__attribute__((unused))int sig)
{
- printerr(2, "dir_notify_handler: sig %d\n", sig);
-
dir_changed = 1;
}

--
1.7.9.5


2012-07-30 20:59:52

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] rpc.gssd: don't call poll() twice a second

On Sun, Jul 29, 2012 at 07:09:30PM -0400, Steve Dickson wrote:
>
>
> On 07/29/2012 01:48 PM, J. Bruce Fields wrote:
> > On Fri, Jul 27, 2012 at 07:08:53PM -0400, Jim Rees wrote:
> >> J. Bruce Fields wrote:
> >>
> >> + if (1 != write(pipefd[1], "!", 1))
> >> + printerr(2, "weird; maybe an interrupt?");
> >>
> >> Use Yoda conditions must we?
> >
> > Yeah, yeah. How about:
> >
> > static void something_changed(void)
> > {
> > - if (1 != write(pipefd[1], "!", 1))
> > - printerr(2, "weird; maybe an interrupt?");
> > + if (write(pipefd[1], "!", 1) != 1)
> > + printerr(0, "%s writing to pipe", strerror(errno));
> > }
> >
> > ?
> Better... IMHO.. but what's going to mean when we see that in some log?

Beats me.

--b.

2012-08-01 12:36:11

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpc.gssd: don't call poll() twice a second.



On 07/31/2012 06:29 PM, Chuck Lever wrote:
>
> On Jul 31, 2012, at 2:27 PM, J. Bruce Fields wrote:
>
>> On Tue, Jul 31, 2012 at 02:23:36PM -0700, Chuck Lever wrote:
>>>
>>> On Jul 31, 2012, at 2:00 PM, J. Bruce Fields wrote:
>>>
>>>> From: "J. Bruce Fields" <[email protected]>
>>>>
>>>> Use ppoll instead.
>>>>
>>>> (This breaks compatibility with kernels before 2.6.16 and glibc before
>>>> 2.4).
>>>
>>> I think I prefer seeing this wrapped with some autoconf machinery to allow nfs-utils to continue to build on earlier systems.
>>
>> I was assuming we could drop support for older systems.
>
> We went to a lot of trouble to get the IPv6 stuff to build correctly on older systems, and we still heard complaints.
>
>> If not, then'd I'd much prefer to go back to the self-pipe trick than to
>> deal with autoconf and ifdef'ing out all this stuff.
>
> This patch is a tiny change. I don't think autoconf in this case is onerous. I'm happy to help you with it.
I think it would be work the effort... Let me know if I can help...

steved.

>
>> --b.
>>
>>>
>>>> Signed-off-by: J. Bruce Fields <[email protected]>
>>>> ---
>>>> utils/gssd/gssd_main_loop.c | 12 +++++-------
>>>> 1 file changed, 5 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
>>>> index 142c8c5..2d20fa2 100644
>>>> --- a/utils/gssd/gssd_main_loop.c
>>>> +++ b/utils/gssd/gssd_main_loop.c
>>>> @@ -186,13 +186,14 @@ gssd_run()
>>>> struct sigaction dn_act = {
>>>> .sa_handler = dir_notify_handler
>>>> };
>>>> - sigset_t set;
>>>> + sigset_t set, emptyset;
>>>>
>>>> sigemptyset(&dn_act.sa_mask);
>>>> sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);
>>>>
>>>> - /* just in case the signal is blocked... */
>>>> + sigemptyset(&emptyset);
>>>> sigemptyset(&set);
>>>> + /* just in case the signal is blocked... */
>>>> sigaddset(&set, DNOTIFY_SIGNAL);
>>>> sigprocmask(SIG_UNBLOCK, &set, NULL);
>>>>
>>>> @@ -210,16 +211,13 @@ gssd_run()
>>>> exit(1);
>>>> }
>>>> }
>>>> - /* race condition here: dir_changed could be set before we
>>>> - * enter the poll, and we'd never notice if it weren't for the
>>>> - * timeout. */
>>>> - ret = poll(pollarray, pollsize, POLL_MILLISECS);
>>>> + ret = ppoll(pollarray, pollsize, NULL, &emptyset);
>>>> if (ret < 0) {
>>>> if (errno != EINTR)
>>>> printerr(0,
>>>> "WARNING: error return from poll\n");
>>>> } else if (ret == 0) {
>>>> - /* timeout */
>>>> + printerr(0, "WARNING: unexpected timeout\n");
>>>> } else { /* ret > 0 */
>>>> scan_poll_results(ret);
>>>> }
>>>> --
>>>> 1.7.9.5
>>>>
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>>> the body of a message to [email protected]
>>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>
>>> --
>>> Chuck Lever
>>> chuck[dot]lever[at]oracle[dot]com
>>>
>>>
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Chuck Lever
> chuck[dot]lever[at]oracle[dot]com
>
>
>

2012-08-06 14:22:28

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH 1/4] rpc.gssd: simplify signal handling



On 07/31/2012 05:00 PM, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <[email protected]>
>
> We're not actually using the extra sa_sigaction parameters.
>
> Signed-off-by: J. Bruce Fields <[email protected]>
> ---
> utils/gssd/gssd_main_loop.c | 11 +++++------
> 1 file changed, 5 insertions(+), 6 deletions(-)
Committed...

steved.
>
> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> index c18e12c..9954ffb 100644
> --- a/utils/gssd/gssd_main_loop.c
> +++ b/utils/gssd/gssd_main_loop.c
> @@ -61,9 +61,9 @@ extern int pollsize;
>
> static volatile int dir_changed = 1;
>
> -static void dir_notify_handler(int sig, siginfo_t *si, void *data)
> +static void dir_notify_handler(int sig)
> {
> - printerr(2, "dir_notify_handler: sig %d si %p data %p\n", sig, si, data);
> + printerr(2, "dir_notify_handler: sig %d\n", sig);
>
> dir_changed = 1;
> }
> @@ -183,13 +183,12 @@ void
> gssd_run()
> {
> int ret;
> - struct sigaction dn_act;
> + struct sigaction dn_act = {
> + .sa_handler = dir_notify_handler
> + };
> sigset_t set;
>
> - /* Taken from linux/Documentation/dnotify.txt: */
> - dn_act.sa_sigaction = dir_notify_handler;
> sigemptyset(&dn_act.sa_mask);
> - dn_act.sa_flags = SA_SIGINFO;
> sigaction(DNOTIFY_SIGNAL, &dn_act, NULL);
>
> /* just in case the signal is blocked... */

2012-08-06 14:22:51

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH 2/4] rpc.gssd: don't call printerr from signal handler



On 07/31/2012 05:00 PM, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <[email protected]>
>
> printerr() isn't actually safe to call from a signal handler. It might
> be possible to make it so, but I think this is the only case in
> nfs-utils where we try to, and I'm not convince it's worth it.
>
> This fixes a bug that would eventually cause mounts to hang when gssd
> is run with -vv.
>
> Signed-off-by: J. Bruce Fields <[email protected]>
Committed...

steved.
> ---
> utils/gssd/gssd_main_loop.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> index 9954ffb..6914687 100644
> --- a/utils/gssd/gssd_main_loop.c
> +++ b/utils/gssd/gssd_main_loop.c
> @@ -61,10 +61,8 @@ extern int pollsize;
>
> static volatile int dir_changed = 1;
>
> -static void dir_notify_handler(int sig)
> +static void dir_notify_handler(__attribute__((unused))int sig)
> {
> - printerr(2, "dir_notify_handler: sig %d\n", sig);
> -
> dir_changed = 1;
> }
>

2012-08-06 14:23:12

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH 3/4] rpc.gssd: handle error to open toplevel directory



On 07/31/2012 05:00 PM, J. Bruce Fields wrote:
> From: "J. Bruce Fields" <[email protected]>
>
> Reverse the sense of the test here, and also add debugging and cleanup
> in the error case.
>
> (Though the lack of cleanup isn't currently a problem in practice since
> we'll eventually exit in this case.)
>
> Signed-off-by: J. Bruce Fields <[email protected]>
Committed...

steved.

> ---
> utils/gssd/gssd_main_loop.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/utils/gssd/gssd_main_loop.c b/utils/gssd/gssd_main_loop.c
> index 6914687..142c8c5 100644
> --- a/utils/gssd/gssd_main_loop.c
> +++ b/utils/gssd/gssd_main_loop.c
> @@ -121,11 +121,13 @@ topdirs_add_entry(struct dirent *dent)
> }
> snprintf(tdi->dirname, PATH_MAX, "%s/%s", pipefs_dir, dent->d_name);
> tdi->fd = open(tdi->dirname, O_RDONLY);
> - if (tdi->fd != -1) {
> - fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
> - fcntl(tdi->fd, F_NOTIFY,
> - DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);
> + if (tdi->fd == -1) {
> + printerr(0, "ERROR: failed to open %s\n", tdi->dirname);
> + free(tdi);
> + return -1;
> }
> + fcntl(tdi->fd, F_SETSIG, DNOTIFY_SIGNAL);
> + fcntl(tdi->fd, F_NOTIFY, DN_CREATE|DN_DELETE|DN_MODIFY|DN_MULTISHOT);
>
> TAILQ_INSERT_HEAD(&topdirs_list, tdi, list);
> return 0;