Currently my_svc_run does not handle poll time allowing idle TCP
connections to remain ESTABLISHED indefinitely. When the number
of connections reaches the limit the open file descriptors
(ulimit -n) then accept(2) fails with EMFILE. Since libtirpc does
not handle EMFILE returned from accept(2) this get my_svc_run into
a tight loop calling accept(2) resulting in the RPC service being
down, it's no longer able to service any requests.
Fix by removing idle connections when select(2) times out in my_svc_run
and when open(2) returns EMFILE/ENFILE in auth_reload.
Signed-off-by: [email protected]
---
support/export/auth.c | 12 ++++++++++--
utils/mountd/svc_run.c | 10 ++++++++--
utils/statd/svc_run.c | 11 ++++++++---
3 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/support/export/auth.c b/support/export/auth.c
index 03ce4b8a0e1e..0bb189fb4037 100644
--- a/support/export/auth.c
+++ b/support/export/auth.c
@@ -81,6 +81,8 @@ check_useipaddr(void)
cache_flush();
}
+extern void __svc_destroy_idle(int, bool_t);
+
unsigned int
auth_reload()
{
@@ -91,8 +93,14 @@ auth_reload()
int fd;
if ((fd = open(etab.statefn, O_RDONLY)) < 0) {
- xlog(L_FATAL, "couldn't open %s", etab.statefn);
- } else if (fstat(fd, &stb) < 0) {
+ if (errno != EMFILE && errno != ENFILE)
+ xlog(L_FATAL, "couldn't open %s", etab.statefn);
+ /* remove idle */
+ __svc_destroy_idle(5, FALSE);
+ if ((fd = open(etab.statefn, O_RDONLY)) < 0)
+ xlog(L_FATAL, "couldn't open %s", etab.statefn);
+ }
+ if (fstat(fd, &stb) < 0) {
xlog(L_FATAL, "couldn't stat %s", etab.statefn);
close(fd);
} else if (last_fd != -1 && stb.st_ino == last_inode) {
diff --git a/utils/mountd/svc_run.c b/utils/mountd/svc_run.c
index 167b9757bde2..ada8d0ac8844 100644
--- a/utils/mountd/svc_run.c
+++ b/utils/mountd/svc_run.c
@@ -59,6 +59,7 @@
#include "export.h"
void my_svc_run(void);
+extern void __svc_destroy_idle(int , bool_t);
#if defined(__GLIBC__) && LONG_MAX != INT_MAX
/* bug in glibc 2.3.6 and earlier, we need
@@ -95,6 +96,7 @@ my_svc_run(void)
{
fd_set readfds;
int selret;
+ struct timeval tv;
for (;;) {
@@ -102,8 +104,10 @@ my_svc_run(void)
cache_set_fds(&readfds);
v4clients_set_fds(&readfds);
+ tv.tv_sec = 30;
+ tv.tv_usec = 0;
selret = select(FD_SETSIZE, &readfds,
- (void *) 0, (void *) 0, (struct timeval *) 0);
+ (void *) 0, (void *) 0, &tv);
switch (selret) {
@@ -113,7 +117,9 @@ my_svc_run(void)
continue;
xlog(L_ERROR, "my_svc_run() - select: %m");
return;
-
+ case 0:
+ __svc_destroy_idle(30, FALSE);
+ continue;
default:
selret -= cache_process_req(&readfds);
selret -= v4clients_process(&readfds);
diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
index e343c7689860..8888788c81d0 100644
--- a/utils/statd/svc_run.c
+++ b/utils/statd/svc_run.c
@@ -59,6 +59,7 @@
void my_svc_exit(void);
static int svc_stop = 0;
+extern void __svc_destroy_idle(int , bool_t);
/*
* This is the global notify list onto which all SM_NOTIFY and CALLBACK
@@ -85,6 +86,7 @@ my_svc_run(int sockfd)
FD_SET_TYPE readfds;
int selret;
time_t now;
+ struct timeval tv;
svc_stop = 0;
@@ -101,7 +103,6 @@ my_svc_run(int sockfd)
/* Set notify sockfd for waiting for reply */
FD_SET(sockfd, &readfds);
if (notify) {
- struct timeval tv;
tv.tv_sec = NL_WHEN(notify) - now;
tv.tv_usec = 0;
@@ -111,8 +112,10 @@ my_svc_run(int sockfd)
(void *) 0, (void *) 0, &tv);
} else {
xlog(D_GENERAL, "Waiting for client connections");
+ tv.tv_sec = 30;
+ tv.tv_usec = 0;
selret = select(FD_SETSIZE, &readfds,
- (void *) 0, (void *) 0, (struct timeval *) 0);
+ (void *) 0, (void *) 0, &tv);
}
switch (selret) {
@@ -124,7 +127,9 @@ my_svc_run(int sockfd)
return;
case 0:
- /* A notify/callback timed out. */
+ /* A notify/callback/wait for client timed out. */
+ if (!notify)
+ __svc_destroy_idle(30, FALSE);
continue;
default:
--
2.26.2
Hello,
On 8/7/21 1:02 PM, Dai Ngo wrote:
> Currently my_svc_run does not handle poll time allowing idle TCP
> connections to remain ESTABLISHED indefinitely. When the number
> of connections reaches the limit the open file descriptors
> (ulimit -n) then accept(2) fails with EMFILE. Since libtirpc does
> not handle EMFILE returned from accept(2) this get my_svc_run into
> a tight loop calling accept(2) resulting in the RPC service being
> down, it's no longer able to service any requests.
>
> Fix by removing idle connections when select(2) times out in my_svc_run
> and when open(2) returns EMFILE/ENFILE in auth_reload.
>
> Signed-off-by: [email protected]
> ---
> support/export/auth.c | 12 ++++++++++--
> utils/mountd/svc_run.c | 10 ++++++++--
> utils/statd/svc_run.c | 11 ++++++++---
> 3 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/support/export/auth.c b/support/export/auth.c
> index 03ce4b8a0e1e..0bb189fb4037 100644
> --- a/support/export/auth.c
> +++ b/support/export/auth.c
> @@ -81,6 +81,8 @@ check_useipaddr(void)
> cache_flush();
> }
>
> +extern void __svc_destroy_idle(int, bool_t);
This is adding to the API... Which means mountd
and statd (the next patch) will not compile without
this new API...
Does this mean an SONAME change? That is such a pain!
steved.
> +
> unsigned int
> auth_reload()
> {
> @@ -91,8 +93,14 @@ auth_reload()
> int fd;
>
> if ((fd = open(etab.statefn, O_RDONLY)) < 0) {
> - xlog(L_FATAL, "couldn't open %s", etab.statefn);
> - } else if (fstat(fd, &stb) < 0) {
> + if (errno != EMFILE && errno != ENFILE)
> + xlog(L_FATAL, "couldn't open %s", etab.statefn);
> + /* remove idle */
> + __svc_destroy_idle(5, FALSE);
> + if ((fd = open(etab.statefn, O_RDONLY)) < 0)
> + xlog(L_FATAL, "couldn't open %s", etab.statefn);
> + }
> + if (fstat(fd, &stb) < 0) {
> xlog(L_FATAL, "couldn't stat %s", etab.statefn);
> close(fd);
> } else if (last_fd != -1 && stb.st_ino == last_inode) {
> diff --git a/utils/mountd/svc_run.c b/utils/mountd/svc_run.c
> index 167b9757bde2..ada8d0ac8844 100644
> --- a/utils/mountd/svc_run.c
> +++ b/utils/mountd/svc_run.c
> @@ -59,6 +59,7 @@
> #include "export.h"
>
> void my_svc_run(void);
> +extern void __svc_destroy_idle(int , bool_t);
>
> #if defined(__GLIBC__) && LONG_MAX != INT_MAX
> /* bug in glibc 2.3.6 and earlier, we need
> @@ -95,6 +96,7 @@ my_svc_run(void)
> {
> fd_set readfds;
> int selret;
> + struct timeval tv;
>
> for (;;) {
>
> @@ -102,8 +104,10 @@ my_svc_run(void)
> cache_set_fds(&readfds);
> v4clients_set_fds(&readfds);
>
> + tv.tv_sec = 30;
> + tv.tv_usec = 0;
> selret = select(FD_SETSIZE, &readfds,
> - (void *) 0, (void *) 0, (struct timeval *) 0);
> + (void *) 0, (void *) 0, &tv);
>
>
> switch (selret) {
> @@ -113,7 +117,9 @@ my_svc_run(void)
> continue;
> xlog(L_ERROR, "my_svc_run() - select: %m");
> return;
> -
> + case 0:
> + __svc_destroy_idle(30, FALSE);
> + continue;
> default:
> selret -= cache_process_req(&readfds);
> selret -= v4clients_process(&readfds);
> diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
> index e343c7689860..8888788c81d0 100644
> --- a/utils/statd/svc_run.c
> +++ b/utils/statd/svc_run.c
> @@ -59,6 +59,7 @@
>
> void my_svc_exit(void);
> static int svc_stop = 0;
> +extern void __svc_destroy_idle(int , bool_t);
>
> /*
> * This is the global notify list onto which all SM_NOTIFY and CALLBACK
> @@ -85,6 +86,7 @@ my_svc_run(int sockfd)
> FD_SET_TYPE readfds;
> int selret;
> time_t now;
> + struct timeval tv;
>
> svc_stop = 0;
>
> @@ -101,7 +103,6 @@ my_svc_run(int sockfd)
> /* Set notify sockfd for waiting for reply */
> FD_SET(sockfd, &readfds);
> if (notify) {
> - struct timeval tv;
>
> tv.tv_sec = NL_WHEN(notify) - now;
> tv.tv_usec = 0;
> @@ -111,8 +112,10 @@ my_svc_run(int sockfd)
> (void *) 0, (void *) 0, &tv);
> } else {
> xlog(D_GENERAL, "Waiting for client connections");
> + tv.tv_sec = 30;
> + tv.tv_usec = 0;
> selret = select(FD_SETSIZE, &readfds,
> - (void *) 0, (void *) 0, (struct timeval *) 0);
> + (void *) 0, (void *) 0, &tv);
> }
>
> switch (selret) {
> @@ -124,7 +127,9 @@ my_svc_run(int sockfd)
> return;
>
> case 0:
> - /* A notify/callback timed out. */
> + /* A notify/callback/wait for client timed out. */
> + if (!notify)
> + __svc_destroy_idle(30, FALSE);
> continue;
>
> default:
>
On 8/8/21 9:56 AM, Steve Dickson wrote:
> Hello,
>
> On 8/7/21 1:02 PM, Dai Ngo wrote:
>> Currently my_svc_run does not handle poll time allowing idle TCP
>> connections to remain ESTABLISHED indefinitely. When the number
>> of connections reaches the limit the open file descriptors
>> (ulimit -n) then accept(2) fails with EMFILE. Since libtirpc does
>> not handle EMFILE returned from accept(2) this get my_svc_run into
>> a tight loop calling accept(2) resulting in the RPC service being
>> down, it's no longer able to service any requests.
>>
>> Fix by removing idle connections when select(2) times out in my_svc_run
>> and when open(2) returns EMFILE/ENFILE in auth_reload.
>>
>> Signed-off-by: [email protected]
>> ---
>> support/export/auth.c | 12 ++++++++++--
>> utils/mountd/svc_run.c | 10 ++++++++--
>> utils/statd/svc_run.c | 11 ++++++++---
>> 3 files changed, 26 insertions(+), 7 deletions(-)
>>
>> diff --git a/support/export/auth.c b/support/export/auth.c
>> index 03ce4b8a0e1e..0bb189fb4037 100644
>> --- a/support/export/auth.c
>> +++ b/support/export/auth.c
>> @@ -81,6 +81,8 @@ check_useipaddr(void)
>> cache_flush();
>> }
>> +extern void __svc_destroy_idle(int, bool_t);
> This is adding to the API... Which means mountd
> and statd (the next patch) will not compile without
> this new API...
Yes, both this patch and the patch for rpcbind require the libtirpc patch.
>
> Does this mean an SONAME change? That is such a pain!
Do you have any suggestions to make this less painful?
-Dai
>
> steved.
>
>> +
>> unsigned int
>> auth_reload()
>> {
>> @@ -91,8 +93,14 @@ auth_reload()
>> int fd;
>> if ((fd = open(etab.statefn, O_RDONLY)) < 0) {
>> - xlog(L_FATAL, "couldn't open %s", etab.statefn);
>> - } else if (fstat(fd, &stb) < 0) {
>> + if (errno != EMFILE && errno != ENFILE)
>> + xlog(L_FATAL, "couldn't open %s", etab.statefn);
>> + /* remove idle */
>> + __svc_destroy_idle(5, FALSE);
>> + if ((fd = open(etab.statefn, O_RDONLY)) < 0)
>> + xlog(L_FATAL, "couldn't open %s", etab.statefn);
>> + }
>> + if (fstat(fd, &stb) < 0) {
>> xlog(L_FATAL, "couldn't stat %s", etab.statefn);
>> close(fd);
>> } else if (last_fd != -1 && stb.st_ino == last_inode) {
>> diff --git a/utils/mountd/svc_run.c b/utils/mountd/svc_run.c
>> index 167b9757bde2..ada8d0ac8844 100644
>> --- a/utils/mountd/svc_run.c
>> +++ b/utils/mountd/svc_run.c
>> @@ -59,6 +59,7 @@
>> #include "export.h"
>> void my_svc_run(void);
>> +extern void __svc_destroy_idle(int , bool_t);
>> #if defined(__GLIBC__) && LONG_MAX != INT_MAX
>> /* bug in glibc 2.3.6 and earlier, we need
>> @@ -95,6 +96,7 @@ my_svc_run(void)
>> {
>> fd_set readfds;
>> int selret;
>> + struct timeval tv;
>> for (;;) {
>> @@ -102,8 +104,10 @@ my_svc_run(void)
>> cache_set_fds(&readfds);
>> v4clients_set_fds(&readfds);
>> + tv.tv_sec = 30;
>> + tv.tv_usec = 0;
>> selret = select(FD_SETSIZE, &readfds,
>> - (void *) 0, (void *) 0, (struct timeval *) 0);
>> + (void *) 0, (void *) 0, &tv);
>> switch (selret) {
>> @@ -113,7 +117,9 @@ my_svc_run(void)
>> continue;
>> xlog(L_ERROR, "my_svc_run() - select: %m");
>> return;
>> -
>> + case 0:
>> + __svc_destroy_idle(30, FALSE);
>> + continue;
>> default:
>> selret -= cache_process_req(&readfds);
>> selret -= v4clients_process(&readfds);
>> diff --git a/utils/statd/svc_run.c b/utils/statd/svc_run.c
>> index e343c7689860..8888788c81d0 100644
>> --- a/utils/statd/svc_run.c
>> +++ b/utils/statd/svc_run.c
>> @@ -59,6 +59,7 @@
>> void my_svc_exit(void);
>> static int svc_stop = 0;
>> +extern void __svc_destroy_idle(int , bool_t);
>> /*
>> * This is the global notify list onto which all SM_NOTIFY and
>> CALLBACK
>> @@ -85,6 +86,7 @@ my_svc_run(int sockfd)
>> FD_SET_TYPE readfds;
>> int selret;
>> time_t now;
>> + struct timeval tv;
>> svc_stop = 0;
>> @@ -101,7 +103,6 @@ my_svc_run(int sockfd)
>> /* Set notify sockfd for waiting for reply */
>> FD_SET(sockfd, &readfds);
>> if (notify) {
>> - struct timeval tv;
>> tv.tv_sec = NL_WHEN(notify) - now;
>> tv.tv_usec = 0;
>> @@ -111,8 +112,10 @@ my_svc_run(int sockfd)
>> (void *) 0, (void *) 0, &tv);
>> } else {
>> xlog(D_GENERAL, "Waiting for client connections");
>> + tv.tv_sec = 30;
>> + tv.tv_usec = 0;
>> selret = select(FD_SETSIZE, &readfds,
>> - (void *) 0, (void *) 0, (struct timeval *) 0);
>> + (void *) 0, (void *) 0, &tv);
>> }
>> switch (selret) {
>> @@ -124,7 +127,9 @@ my_svc_run(int sockfd)
>> return;
>> case 0:
>> - /* A notify/callback timed out. */
>> + /* A notify/callback/wait for client timed out. */
>> + if (!notify)
>> + __svc_destroy_idle(30, FALSE);
>> continue;
>> default:
>>
>
* Steve Dickson:
> Hello,
>
> On 8/7/21 1:02 PM, Dai Ngo wrote:
>> Currently my_svc_run does not handle poll time allowing idle TCP
>> connections to remain ESTABLISHED indefinitely. When the number
>> of connections reaches the limit the open file descriptors
>> (ulimit -n) then accept(2) fails with EMFILE. Since libtirpc does
>> not handle EMFILE returned from accept(2) this get my_svc_run into
>> a tight loop calling accept(2) resulting in the RPC service being
>> down, it's no longer able to service any requests.
>> Fix by removing idle connections when select(2) times out in
>> my_svc_run
>> and when open(2) returns EMFILE/ENFILE in auth_reload.
>> Signed-off-by: [email protected]
>> ---
>> support/export/auth.c | 12 ++++++++++--
>> utils/mountd/svc_run.c | 10 ++++++++--
>> utils/statd/svc_run.c | 11 ++++++++---
>> 3 files changed, 26 insertions(+), 7 deletions(-)
>> diff --git a/support/export/auth.c b/support/export/auth.c
>> index 03ce4b8a0e1e..0bb189fb4037 100644
>> --- a/support/export/auth.c
>> +++ b/support/export/auth.c
>> @@ -81,6 +81,8 @@ check_useipaddr(void)
>> cache_flush();
>> }
>> +extern void __svc_destroy_idle(int, bool_t);
> This is adding to the API... Which means mountd
> and statd (the next patch) will not compile without
> this new API...
>
> Does this mean an SONAME change? That is such a pain!
Do you symbol versioning? For RPM-based distributions, adding the new
symbol under a new symbol version would avoid the need for a SONAME
change.
Debian-based distributions use explicit symbol list files and are more
flexible.
Thanks,
Florian