2022-07-20 21:23:10

by Attila Kovacs

[permalink] [raw]
Subject: [PATCH] Eliminate deadlocks in connects with an MT environment

In cnlt_dg_freeres() and clnt_vc_freeres(), cond_signal() is called after
unlocking the mutex (clnt_fd_lock). The manual of pthread_cond_signal()
allows that, but mentions that for consistent scheduling, cond_signal()
should be called with the waiting mutex locked.

clnt_fd_lock is locked on L171, but then not released if jumping to the
err1 label on an error (L175 and L180). This means that those errors
will deadlock all further operations that require clnt_fd_lock access.

Same in clnt_vc.c in clnt_vc_create, on lines 215, 222, and 230 respectively.

Signed-off-by: Steve Dickson <[email protected]>
---
[Reposting Attila's patch with the proper Signed-off-by and format]

src/clnt_dg.c | 9 ++++++---
src/clnt_vc.c | 12 ++++++++----
2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/clnt_dg.c b/src/clnt_dg.c
index b3d82e7..7c5d22e 100644
--- a/src/clnt_dg.c
+++ b/src/clnt_dg.c
@@ -101,9 +101,9 @@ extern mutex_t clnt_fd_lock;
#define release_fd_lock(fd_lock, mask) { \
mutex_lock(&clnt_fd_lock); \
fd_lock->active = FALSE; \
- mutex_unlock(&clnt_fd_lock); \
thr_sigsetmask(SIG_SETMASK, &(mask), NULL); \
cond_signal(&fd_lock->cv); \
+ mutex_unlock(&clnt_fd_lock); \
}

static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
@@ -172,12 +172,15 @@ clnt_dg_create(fd, svcaddr, program, version, sendsz, recvsz)
if (dg_fd_locks == (fd_locks_t *) NULL) {
dg_fd_locks = fd_locks_init();
if (dg_fd_locks == (fd_locks_t *) NULL) {
+ mutex_unlock(&clnt_fd_lock);
goto err1;
}
}
fd_lock = fd_lock_create(fd, dg_fd_locks);
- if (fd_lock == (fd_lock_t *) NULL)
+ if (fd_lock == (fd_lock_t *) NULL) {
+ mutex_unlock(&clnt_fd_lock);
goto err1;
+ }

mutex_unlock(&clnt_fd_lock);
thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
@@ -573,9 +576,9 @@ clnt_dg_freeres(cl, xdr_res, res_ptr)
cu->cu_fd_lock->active = TRUE;
xdrs->x_op = XDR_FREE;
dummy = (*xdr_res)(xdrs, res_ptr);
- mutex_unlock(&clnt_fd_lock);
thr_sigsetmask(SIG_SETMASK, &mask, NULL);
cond_signal(&cu->cu_fd_lock->cv);
+ mutex_unlock(&clnt_fd_lock);
return (dummy);
}

diff --git a/src/clnt_vc.c b/src/clnt_vc.c
index a07e297..3c73e65 100644
--- a/src/clnt_vc.c
+++ b/src/clnt_vc.c
@@ -153,9 +153,9 @@ extern mutex_t clnt_fd_lock;
#define release_fd_lock(fd_lock, mask) { \
mutex_lock(&clnt_fd_lock); \
fd_lock->active = FALSE; \
- mutex_unlock(&clnt_fd_lock); \
thr_sigsetmask(SIG_SETMASK, &(mask), (sigset_t *) NULL); \
cond_signal(&fd_lock->cv); \
+ mutex_unlock(&clnt_fd_lock); \
}

static const char clnt_vc_errstr[] = "%s : %s";
@@ -216,7 +216,9 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
if (vc_fd_locks == (fd_locks_t *) NULL) {
vc_fd_locks = fd_locks_init();
if (vc_fd_locks == (fd_locks_t *) NULL) {
- struct rpc_createerr *ce = &get_rpc_createerr();
+ struct rpc_createerr *ce;
+ mutex_unlock(&clnt_fd_lock);
+ ce = &get_rpc_createerr();
ce->cf_stat = RPC_SYSTEMERROR;
ce->cf_error.re_errno = errno;
goto err;
@@ -224,7 +226,9 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
}
fd_lock = fd_lock_create(fd, vc_fd_locks);
if (fd_lock == (fd_lock_t *) NULL) {
- struct rpc_createerr *ce = &get_rpc_createerr();
+ struct rpc_createerr *ce;
+ mutex_unlock(&clnt_fd_lock);
+ ce = &get_rpc_createerr();
ce->cf_stat = RPC_SYSTEMERROR;
ce->cf_error.re_errno = errno;
goto err;
@@ -495,9 +499,9 @@ clnt_vc_freeres(cl, xdr_res, res_ptr)
cond_wait(&ct->ct_fd_lock->cv, &clnt_fd_lock);
xdrs->x_op = XDR_FREE;
dummy = (*xdr_res)(xdrs, res_ptr);
- mutex_unlock(&clnt_fd_lock);
thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
cond_signal(&ct->ct_fd_lock->cv);
+ mutex_unlock(&clnt_fd_lock);

return dummy;
}
--
2.36.1


2022-07-28 13:27:49

by Steve Dickson

[permalink] [raw]
Subject: Re: [PATCH] Eliminate deadlocks in connects with an MT environment



On 7/20/22 5:20 PM, Attila Kovacs wrote:
> In cnlt_dg_freeres() and clnt_vc_freeres(), cond_signal() is called after
> unlocking the mutex (clnt_fd_lock). The manual of pthread_cond_signal()
> allows that, but mentions that for consistent scheduling, cond_signal()
> should be called with the waiting mutex locked.
>
> clnt_fd_lock is locked on L171, but then not released if jumping to the
> err1 label on an error (L175 and L180). This means that those errors
> will deadlock all further operations that require clnt_fd_lock access.
>
> Same in clnt_vc.c in clnt_vc_create, on lines 215, 222, and 230 respectively.
>
> Signed-off-by: Steve Dickson <[email protected]>
Committed (tag: libtirpc-1-3-3-rc4)

steved.

> ---
> [Reposting Attila's patch with the proper Signed-off-by and format]
>
> src/clnt_dg.c | 9 ++++++---
> src/clnt_vc.c | 12 ++++++++----
> 2 files changed, 14 insertions(+), 7 deletions(-)
>
> diff --git a/src/clnt_dg.c b/src/clnt_dg.c
> index b3d82e7..7c5d22e 100644
> --- a/src/clnt_dg.c
> +++ b/src/clnt_dg.c
> @@ -101,9 +101,9 @@ extern mutex_t clnt_fd_lock;
> #define release_fd_lock(fd_lock, mask) { \
> mutex_lock(&clnt_fd_lock); \
> fd_lock->active = FALSE; \
> - mutex_unlock(&clnt_fd_lock); \
> thr_sigsetmask(SIG_SETMASK, &(mask), NULL); \
> cond_signal(&fd_lock->cv); \
> + mutex_unlock(&clnt_fd_lock); \
> }
>
> static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
> @@ -172,12 +172,15 @@ clnt_dg_create(fd, svcaddr, program, version, sendsz, recvsz)
> if (dg_fd_locks == (fd_locks_t *) NULL) {
> dg_fd_locks = fd_locks_init();
> if (dg_fd_locks == (fd_locks_t *) NULL) {
> + mutex_unlock(&clnt_fd_lock);
> goto err1;
> }
> }
> fd_lock = fd_lock_create(fd, dg_fd_locks);
> - if (fd_lock == (fd_lock_t *) NULL)
> + if (fd_lock == (fd_lock_t *) NULL) {
> + mutex_unlock(&clnt_fd_lock);
> goto err1;
> + }
>
> mutex_unlock(&clnt_fd_lock);
> thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
> @@ -573,9 +576,9 @@ clnt_dg_freeres(cl, xdr_res, res_ptr)
> cu->cu_fd_lock->active = TRUE;
> xdrs->x_op = XDR_FREE;
> dummy = (*xdr_res)(xdrs, res_ptr);
> - mutex_unlock(&clnt_fd_lock);
> thr_sigsetmask(SIG_SETMASK, &mask, NULL);
> cond_signal(&cu->cu_fd_lock->cv);
> + mutex_unlock(&clnt_fd_lock);
> return (dummy);
> }
>
> diff --git a/src/clnt_vc.c b/src/clnt_vc.c
> index a07e297..3c73e65 100644
> --- a/src/clnt_vc.c
> +++ b/src/clnt_vc.c
> @@ -153,9 +153,9 @@ extern mutex_t clnt_fd_lock;
> #define release_fd_lock(fd_lock, mask) { \
> mutex_lock(&clnt_fd_lock); \
> fd_lock->active = FALSE; \
> - mutex_unlock(&clnt_fd_lock); \
> thr_sigsetmask(SIG_SETMASK, &(mask), (sigset_t *) NULL); \
> cond_signal(&fd_lock->cv); \
> + mutex_unlock(&clnt_fd_lock); \
> }
>
> static const char clnt_vc_errstr[] = "%s : %s";
> @@ -216,7 +216,9 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
> if (vc_fd_locks == (fd_locks_t *) NULL) {
> vc_fd_locks = fd_locks_init();
> if (vc_fd_locks == (fd_locks_t *) NULL) {
> - struct rpc_createerr *ce = &get_rpc_createerr();
> + struct rpc_createerr *ce;
> + mutex_unlock(&clnt_fd_lock);
> + ce = &get_rpc_createerr();
> ce->cf_stat = RPC_SYSTEMERROR;
> ce->cf_error.re_errno = errno;
> goto err;
> @@ -224,7 +226,9 @@ clnt_vc_create(fd, raddr, prog, vers, sendsz, recvsz)
> }
> fd_lock = fd_lock_create(fd, vc_fd_locks);
> if (fd_lock == (fd_lock_t *) NULL) {
> - struct rpc_createerr *ce = &get_rpc_createerr();
> + struct rpc_createerr *ce;
> + mutex_unlock(&clnt_fd_lock);
> + ce = &get_rpc_createerr();
> ce->cf_stat = RPC_SYSTEMERROR;
> ce->cf_error.re_errno = errno;
> goto err;
> @@ -495,9 +499,9 @@ clnt_vc_freeres(cl, xdr_res, res_ptr)
> cond_wait(&ct->ct_fd_lock->cv, &clnt_fd_lock);
> xdrs->x_op = XDR_FREE;
> dummy = (*xdr_res)(xdrs, res_ptr);
> - mutex_unlock(&clnt_fd_lock);
> thr_sigsetmask(SIG_SETMASK, &(mask), NULL);
> cond_signal(&ct->ct_fd_lock->cv);
> + mutex_unlock(&clnt_fd_lock);
>
> return dummy;
> }