If multiple processes are locking the same file, the NSMPROC_MON
message would be send more than one times when the host is not
monitored by the peer.
Add a mutex to ensure that we just send once. If some one has send
the msg, Just waiting for the result or try again if failed.
Signed-off-by: ZhangXiaoxu <[email protected]>
---
fs/lockd/mon.c | 10 ++++++++++
include/linux/lockd/lockd.h | 1 +
2 files changed, 11 insertions(+)
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 654594e..4ba2658 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -155,6 +155,12 @@ int nsm_monitor(const struct nlm_host *host)
if (nsm->sm_monitored)
return 0;
+ mutex_lock(&nsm->mutex);
+ if (nsm->sm_monitored) {
+ mutex_unlock(&nsm->mutex);
+ return 0;
+ }
+
/*
* Choose whether to record the caller_name or IP address of
* this peer in the local rpc.statd's database.
@@ -165,6 +171,7 @@ int nsm_monitor(const struct nlm_host *host)
if (unlikely(res.status != 0))
status = -EIO;
if (unlikely(status < 0)) {
+ mutex_unlock(&nsm->mutex);
pr_notice_ratelimited("lockd: cannot monitor %s\n", nsm->sm_name);
return status;
}
@@ -174,6 +181,8 @@ int nsm_monitor(const struct nlm_host *host)
nsm_local_state = res.state;
dprintk("lockd: NSM state changed to %d\n", nsm_local_state);
}
+
+ mutex_unlock(&nsm->mutex);
return 0;
}
@@ -284,6 +293,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
memcpy(nsm_addr(new), sap, salen);
new->sm_addrlen = salen;
nsm_init_private(new);
+ mutex_init(&new->mutex);
if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
sizeof(new->sm_addrbuf)) == 0)
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b065ef4..c56069c 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -89,6 +89,7 @@ struct nsm_handle {
char *sm_name;
struct sockaddr_storage sm_addr;
size_t sm_addrlen;
+ struct mutex mutex; /* should be send only once even more threads */
unsigned int sm_monitored : 1,
sm_sticky : 1; /* don't unmonitor */
struct nsm_private sm_priv;
--
2.7.4
> On Jan 17, 2019, at 1:15 AM, ZhangXiaoxu <[email protected]> wrote:
>
> If multiple processes are locking the same file, the NSMPROC_MON
> message would be send more than one times when the host is not
> monitored by the peer.
Hi, what is the harm of sending more than one MON request for a peer?
> Add a mutex to ensure that we just send once. If some one has send
> the msg, Just waiting for the result or try again if failed.
>
> Signed-off-by: ZhangXiaoxu <[email protected]>
> ---
> fs/lockd/mon.c | 10 ++++++++++
> include/linux/lockd/lockd.h | 1 +
> 2 files changed, 11 insertions(+)
>
> diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
> index 654594e..4ba2658 100644
> --- a/fs/lockd/mon.c
> +++ b/fs/lockd/mon.c
> @@ -155,6 +155,12 @@ int nsm_monitor(const struct nlm_host *host)
> if (nsm->sm_monitored)
> return 0;
>
> + mutex_lock(&nsm->mutex);
> + if (nsm->sm_monitored) {
> + mutex_unlock(&nsm->mutex);
> + return 0;
> + }
> +
> /*
> * Choose whether to record the caller_name or IP address of
> * this peer in the local rpc.statd's database.
> @@ -165,6 +171,7 @@ int nsm_monitor(const struct nlm_host *host)
> if (unlikely(res.status != 0))
> status = -EIO;
> if (unlikely(status < 0)) {
> + mutex_unlock(&nsm->mutex);
> pr_notice_ratelimited("lockd: cannot monitor %s\n", nsm->sm_name);
> return status;
> }
> @@ -174,6 +181,8 @@ int nsm_monitor(const struct nlm_host *host)
> nsm_local_state = res.state;
> dprintk("lockd: NSM state changed to %d\n", nsm_local_state);
> }
> +
> + mutex_unlock(&nsm->mutex);
> return 0;
> }
>
> @@ -284,6 +293,7 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
> memcpy(nsm_addr(new), sap, salen);
> new->sm_addrlen = salen;
> nsm_init_private(new);
> + mutex_init(&new->mutex);
>
> if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
> sizeof(new->sm_addrbuf)) == 0)
> diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> index b065ef4..c56069c 100644
> --- a/include/linux/lockd/lockd.h
> +++ b/include/linux/lockd/lockd.h
> @@ -89,6 +89,7 @@ struct nsm_handle {
> char *sm_name;
> struct sockaddr_storage sm_addr;
> size_t sm_addrlen;
> + struct mutex mutex; /* should be send only once even more threads */
> unsigned int sm_monitored : 1,
> sm_sticky : 1; /* don't unmonitor */
> struct nsm_private sm_priv;
> --
> 2.7.4
>
--
Chuck Lever
[email protected]
On 1/17/2019 10:33 PM, Chuck Lever wrote:
> Hi, what is the harm of sending more than one MON request for a peer?
Maybe no harm.
The rpc.statd won't record the peer twice.
I found this when I tested the xfstest generic/089.
The rpc task for that msg sometimes take very long time.
rpc took 57 sec who t_mtab/2377 srv rpc.statd xid 2453489031 prog statd/100024/1 proc 2 prot 6 flags 0x680
I think the msg just send only one time is enough.
RPC and rpc.statd maybe consume for a long time on the network.
> On Jan 17, 2019, at 9:48 PM, zhangxiaoxu (A) <[email protected]> wrote:
>
>
>
> On 1/17/2019 10:33 PM, Chuck Lever wrote:
>> Hi, what is the harm of sending more than one MON request for a peer?
> Maybe no harm.
> The rpc.statd won't record the peer twice.
>
> I found this when I tested the xfstest generic/089.
> The rpc task for that msg sometimes take very long time.
> rpc took 57 sec who t_mtab/2377 srv rpc.statd xid 2453489031 prog statd/100024/1 proc 2 prot 6 flags 0x680
MON is supposed to be a call to a local service (on the same host).
It would be interesting if you could determine why it takes so long.
> I think the msg just send only one time is enough.
> RPC and rpc.statd maybe consume for a long time on the network.
--
Chuck Lever
[email protected]
On 1/19/2019 4:59 AM, Chuck Lever wrote:
>
>
>> On Jan 17, 2019, at 9:48 PM, zhangxiaoxu (A) <[email protected]> wrote:
>>
>>
>>
>> On 1/17/2019 10:33 PM, Chuck Lever wrote:
>>> Hi, what is the harm of sending more than one MON request for a peer?
>> Maybe no harm.
>> The rpc.statd won't record the peer twice.
>>
>> I found this when I tested the xfstest generic/089.
>> The rpc task for that msg sometimes take very long time.
>> rpc took 57 sec who t_mtab/2377 srv rpc.statd xid 2453489031 prog statd/100024/1 proc 2 prot 6 flags 0x680
>
> MON is supposed to be a call to a local service (on the same host).
> It would be interesting if you could determine why it takes so long.
>
Yes, I'm working for that.
I think this is an optimization point.
Why do we have to send multiple requests?
>
>> I think the msg just send only one time is enough.
>> RPC and rpc.statd maybe consume for a long time on the network.
>
>
> --
> Chuck Lever
> [email protected]
>
>
>
>
>
> On Jan 24, 2019, at 4:29 AM, zhangxiaoxu (A) <[email protected]> wrote:
>
>
>
> On 1/19/2019 4:59 AM, Chuck Lever wrote:
>>> On Jan 17, 2019, at 9:48 PM, zhangxiaoxu (A) <[email protected]> wrote:
>>>
>>>
>>>
>>> On 1/17/2019 10:33 PM, Chuck Lever wrote:
>>>> Hi, what is the harm of sending more than one MON request for a peer?
>>> Maybe no harm.
>>> The rpc.statd won't record the peer twice.
>>>
>>> I found this when I tested the xfstest generic/089.
>>> The rpc task for that msg sometimes take very long time.
>>> rpc took 57 sec who t_mtab/2377 srv rpc.statd xid 2453489031 prog statd/100024/1 proc 2 prot 6 flags 0x680
>> MON is supposed to be a call to a local service (on the same host).
>> It would be interesting if you could determine why it takes so long.
> Yes, I'm working for that.
> I think this is an optimization point.
> Why do we have to send multiple requests?
I agree that multiple requests are unnecessary, but
they are harmless too. I don't think it's worth the
trouble to change this behavior.
The problem is MON requests on your system takes too long.
>>> I think the msg just send only one time is enough.
>>> RPC and rpc.statd maybe consume for a long time on the network.
>> --
>> Chuck Lever
>> [email protected]
--
Chuck Lever
[email protected]
On Thu, 2019-01-24 at 11:17 -0800, Chuck Lever wrote:
> > On Jan 24, 2019, at 4:29 AM, zhangxiaoxu (A) <
> > [email protected]> wrote:
> >
> >
> >
> > On 1/19/2019 4:59 AM, Chuck Lever wrote:
> > > > On Jan 17, 2019, at 9:48 PM, zhangxiaoxu (A) <
> > > > [email protected]> wrote:
> > > >
> > > >
> > > >
> > > > On 1/17/2019 10:33 PM, Chuck Lever wrote:
> > > > > Hi, what is the harm of sending more than one MON request for
> > > > > a peer?
> > > > Maybe no harm.
> > > > The rpc.statd won't record the peer twice.
> > > >
> > > > I found this when I tested the xfstest generic/089.
> > > > The rpc task for that msg sometimes take very long time.
> > > > rpc took 57 sec who t_mtab/2377 srv rpc.statd xid 2453489031
> > > > prog statd/100024/1 proc 2 prot 6 flags 0x680
> > > MON is supposed to be a call to a local service (on the same
> > > host).
> > > It would be interesting if you could determine why it takes so
> > > long.
> > Yes, I'm working for that.
> > I think this is an optimization point.
> > Why do we have to send multiple requests?
>
> I agree that multiple requests are unnecessary, but
> they are harmless too. I don't think it's worth the
> trouble to change this behavior.
>
> The problem is MON requests on your system takes too long.
57 seconds is pretty much unheard of. Is rpc.statd correctly registered
with rpcbind, and is the port that rpc.statd is listening on open to
connections from localhost (i.e. no firewall blockage)?
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]