Even though there is a WARN_ON_ONCE check, it seems possible for
nfs4_find_file to race with the destruction of an fi_deleg_file while
trying to take a reference to it.
put_deleg_file is done while holding the fi_lock. Take and hold it
when dealing with the fi_deleg_file in nfs4_find_file.
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b68238024e49..3df3ae84bd07 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
static struct nfsd_file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
+ struct nfsd_file *ret = NULL;
+
if (!s)
return NULL;
switch (s->sc_type) {
case NFS4_DELEG_STID:
- if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
- return NULL;
- return nfsd_file_get(s->sc_file->fi_deleg_file);
+ spin_lock(&s->sc_file->fi_lock);
+ if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
+ ret = nfsd_file_get(s->sc_file->fi_deleg_file);
+ spin_unlock(&s->sc_file->fi_lock);
+ break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
- return find_readable_file(s->sc_file);
+ ret = find_readable_file(s->sc_file);
else
- return find_writeable_file(s->sc_file);
+ ret = find_writeable_file(s->sc_file);
}
- return NULL;
+ return ret;
}
static __be32
--
2.39.0
> On Jan 5, 2023, at 7:18 AM, Jeff Layton <[email protected]> wrote:
>
> Even though there is a WARN_ON_ONCE check, it seems possible for
> nfs4_find_file to race with the destruction of an fi_deleg_file while
> trying to take a reference to it.
>
> put_deleg_file is done while holding the fi_lock. Take and hold it
> when dealing with the fi_deleg_file in nfs4_find_file.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 16 ++++++++++------
> 1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index b68238024e49..3df3ae84bd07 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> static struct nfsd_file *
> nfs4_find_file(struct nfs4_stid *s, int flags)
> {
> + struct nfsd_file *ret = NULL;
> +
> if (!s)
> return NULL;
>
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> - return NULL;
> - return nfsd_file_get(s->sc_file->fi_deleg_file);
> + spin_lock(&s->sc_file->fi_lock);
> + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
You'd think this would be a really really hard race to hit.
What I'm wondering, though, is whether the WARN_ON_ONCE should
be dropped by this patch. I've never seen it fire.
> + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> + spin_unlock(&s->sc_file->fi_lock);
> + break;
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> if (flags & RD_STATE)
> - return find_readable_file(s->sc_file);
> + ret = find_readable_file(s->sc_file);
> else
> - return find_writeable_file(s->sc_file);
> + ret = find_writeable_file(s->sc_file);
> }
>
> - return NULL;
> + return ret;
> }
>
> static __be32
> --
> 2.39.0
>
--
Chuck Lever
On Thu, 2023-01-05 at 14:46 +0000, Chuck Lever III wrote:
>
> > On Jan 5, 2023, at 7:18 AM, Jeff Layton <[email protected]> wrote:
> >
> > Even though there is a WARN_ON_ONCE check, it seems possible for
> > nfs4_find_file to race with the destruction of an fi_deleg_file while
> > trying to take a reference to it.
> >
> > put_deleg_file is done while holding the fi_lock. Take and hold it
> > when dealing with the fi_deleg_file in nfs4_find_file.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfsd/nfs4state.c | 16 ++++++++++------
> > 1 file changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index b68238024e49..3df3ae84bd07 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > static struct nfsd_file *
> > nfs4_find_file(struct nfs4_stid *s, int flags)
> > {
> > + struct nfsd_file *ret = NULL;
> > +
> > if (!s)
> > return NULL;
> >
> > switch (s->sc_type) {
> > case NFS4_DELEG_STID:
> > - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > - return NULL;
> > - return nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_lock(&s->sc_file->fi_lock);
> > + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
>
> You'd think this would be a really really hard race to hit.
>
> What I'm wondering, though, is whether the WARN_ON_ONCE should
> be dropped by this patch. I've never seen it fire.
>
>
I have:
https://bugzilla.redhat.com/show_bug.cgi?id=1997177
It's possible though that those WARNs are fallout from other bugs in the
delegation handling, but it's hard to know for sure. I think we ought to
keep it there for now.
> > + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_unlock(&s->sc_file->fi_lock);
> > + break;
> > case NFS4_OPEN_STID:
> > case NFS4_LOCK_STID:
> > if (flags & RD_STATE)
> > - return find_readable_file(s->sc_file);
> > + ret = find_readable_file(s->sc_file);
> > else
> > - return find_writeable_file(s->sc_file);
> > + ret = find_writeable_file(s->sc_file);
> > }
> >
> > - return NULL;
> > + return ret;
> > }
> >
> > static __be32
> > --
> > 2.39.0
> >
>
> --
> Chuck Lever
>
>
>
--
Jeff Layton <[email protected]>
> On Jan 5, 2023, at 3:43 PM, Jeff Layton <[email protected]> wrote:
>
> On Thu, 2023-01-05 at 14:46 +0000, Chuck Lever III wrote:
>>
>>> On Jan 5, 2023, at 7:18 AM, Jeff Layton <[email protected]> wrote:
>>>
>>> Even though there is a WARN_ON_ONCE check, it seems possible for
>>> nfs4_find_file to race with the destruction of an fi_deleg_file while
>>> trying to take a reference to it.
>>>
>>> put_deleg_file is done while holding the fi_lock. Take and hold it
>>> when dealing with the fi_deleg_file in nfs4_find_file.
>>>
>>> Signed-off-by: Jeff Layton <[email protected]>
>>> ---
>>> fs/nfsd/nfs4state.c | 16 ++++++++++------
>>> 1 file changed, 10 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index b68238024e49..3df3ae84bd07 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>>> static struct nfsd_file *
>>> nfs4_find_file(struct nfs4_stid *s, int flags)
>>> {
>>> + struct nfsd_file *ret = NULL;
>>> +
>>> if (!s)
>>> return NULL;
>>>
>>> switch (s->sc_type) {
>>> case NFS4_DELEG_STID:
>>> - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
>>> - return NULL;
>>> - return nfsd_file_get(s->sc_file->fi_deleg_file);
>>> + spin_lock(&s->sc_file->fi_lock);
>>> + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
>>
>> You'd think this would be a really really hard race to hit.
>>
>> What I'm wondering, though, is whether the WARN_ON_ONCE should
>> be dropped by this patch. I've never seen it fire.
>>
>>
>
> I have:
>
> https://bugzilla.redhat.com/show_bug.cgi?id=1997177
> It's possible though that those WARNs are fallout from other bugs in the
> delegation handling, but it's hard to know for sure.
Before 2015 there were a bunch of BUG_ON's in this code that
were converted to WARN after Linus complained. Before that,
I think these were all debugging sentinels. (in which case
I would argue they might be better recast as tracepoints,
but that's for another day).
> I think we ought to keep it there for now.
The question is whether the WARN_ON is adding value for customers.
Can they do something about it? If they give us this information,
can we do something about it?
I can't tell from the warning whether the problem is due to a
server bug or valid client behavior. Both the server and the
client workload appear to survive.
So, I just don't feel like it's adding value, and firing a WARN
while holding a spinlock makes me squidgy.
>>> + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
>>> + spin_unlock(&s->sc_file->fi_lock);
>>> + break;
>>> case NFS4_OPEN_STID:
>>> case NFS4_LOCK_STID:
>>> if (flags & RD_STATE)
>>> - return find_readable_file(s->sc_file);
>>> + ret = find_readable_file(s->sc_file);
>>> else
>>> - return find_writeable_file(s->sc_file);
>>> + ret = find_writeable_file(s->sc_file);
>>> }
>>>
>>> - return NULL;
>>> + return ret;
>>> }
>>>
>>> static __be32
>>> --
>>> 2.39.0
>>>
>>
>> --
>> Chuck Lever
>>
>>
>>
>
> --
> Jeff Layton <[email protected]>
--
Chuck Lever
On Thu, 05 Jan 2023, Jeff Layton wrote:
> Even though there is a WARN_ON_ONCE check, it seems possible for
> nfs4_find_file to race with the destruction of an fi_deleg_file while
> trying to take a reference to it.
>
> put_deleg_file is done while holding the fi_lock. Take and hold it
> when dealing with the fi_deleg_file in nfs4_find_file.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 16 ++++++++++------
> 1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index b68238024e49..3df3ae84bd07 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> static struct nfsd_file *
> nfs4_find_file(struct nfs4_stid *s, int flags)
> {
> + struct nfsd_file *ret = NULL;
> +
> if (!s)
> return NULL;
>
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> - return NULL;
> - return nfsd_file_get(s->sc_file->fi_deleg_file);
> + spin_lock(&s->sc_file->fi_lock);
> + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> + spin_unlock(&s->sc_file->fi_lock);
> + break;
As an nfsd_file is freed with rcu, we don't need the spinlock.
rcu_read_lock()
ret = rcu_dereference(s->sc_file->fi_deleg_file);
if (ret)
ret = nfsd_file_get(ret);
rcu_read_unlock();
You could even put the NULL test in nfsd_file_get() and have:
rcu_read_lock()l;
ret = nfsd_file_get(rcu_dereference(s->sc_file->fi_deleg_file));
rcu_read_unlock();
but that might not be a win.
I agree with Chuck that the WARNing isn't helpful.
NeilBrown
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> if (flags & RD_STATE)
> - return find_readable_file(s->sc_file);
> + ret = find_readable_file(s->sc_file);
> else
> - return find_writeable_file(s->sc_file);
> + ret = find_writeable_file(s->sc_file);
> }
>
> - return NULL;
> + return ret;
> }
>
> static __be32
> --
> 2.39.0
>
>
On Fri, 2023-01-06 at 10:05 +1100, NeilBrown wrote:
> On Thu, 05 Jan 2023, Jeff Layton wrote:
> > Even though there is a WARN_ON_ONCE check, it seems possible for
> > nfs4_find_file to race with the destruction of an fi_deleg_file while
> > trying to take a reference to it.
> >
> > put_deleg_file is done while holding the fi_lock. Take and hold it
> > when dealing with the fi_deleg_file in nfs4_find_file.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfsd/nfs4state.c | 16 ++++++++++------
> > 1 file changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index b68238024e49..3df3ae84bd07 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > static struct nfsd_file *
> > nfs4_find_file(struct nfs4_stid *s, int flags)
> > {
> > + struct nfsd_file *ret = NULL;
> > +
> > if (!s)
> > return NULL;
> >
> > switch (s->sc_type) {
> > case NFS4_DELEG_STID:
> > - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > - return NULL;
> > - return nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_lock(&s->sc_file->fi_lock);
> > + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_unlock(&s->sc_file->fi_lock);
> > + break;
>
> As an nfsd_file is freed with rcu, we don't need the spinlock.
>
> rcu_read_lock()
> ret = rcu_dereference(s->sc_file->fi_deleg_file);
> if (ret)
> ret = nfsd_file_get(ret);
> rcu_read_unlock();
>
> You could even put the NULL test in nfsd_file_get() and have:
>
> rcu_read_lock()l;
> ret = nfsd_file_get(rcu_dereference(s->sc_file->fi_deleg_file));
> rcu_read_unlock();
>
> but that might not be a win.
>
> I agree with Chuck that the WARNing isn't helpful.
>
>
Good point. I'll send a v2 set in a bit.
>
> > case NFS4_OPEN_STID:
> > case NFS4_LOCK_STID:
> > if (flags & RD_STATE)
> > - return find_readable_file(s->sc_file);
> > + ret = find_readable_file(s->sc_file);
> > else
> > - return find_writeable_file(s->sc_file);
> > + ret = find_writeable_file(s->sc_file);
> > }
> >
> > - return NULL;
> > + return ret;
> > }
> >
> > static __be32
> > --
> > 2.39.0
> >
> >
--
Jeff Layton <[email protected]>
On Fri, 2023-01-06 at 10:05 +1100, NeilBrown wrote:
> On Thu, 05 Jan 2023, Jeff Layton wrote:
> > Even though there is a WARN_ON_ONCE check, it seems possible for
> > nfs4_find_file to race with the destruction of an fi_deleg_file while
> > trying to take a reference to it.
> >
> > put_deleg_file is done while holding the fi_lock. Take and hold it
> > when dealing with the fi_deleg_file in nfs4_find_file.
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfsd/nfs4state.c | 16 ++++++++++------
> > 1 file changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index b68238024e49..3df3ae84bd07 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > static struct nfsd_file *
> > nfs4_find_file(struct nfs4_stid *s, int flags)
> > {
> > + struct nfsd_file *ret = NULL;
> > +
> > if (!s)
> > return NULL;
> >
> > switch (s->sc_type) {
> > case NFS4_DELEG_STID:
> > - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > - return NULL;
> > - return nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_lock(&s->sc_file->fi_lock);
> > + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> > + spin_unlock(&s->sc_file->fi_lock);
> > + break;
>
> As an nfsd_file is freed with rcu, we don't need the spinlock.
>
> rcu_read_lock()
> ret = rcu_dereference(s->sc_file->fi_deleg_file);
> if (ret)
> ret = nfsd_file_get(ret);
> rcu_read_unlock();
>
> You could even put the NULL test in nfsd_file_get() and have:
>
> rcu_read_lock()l;
> ret = nfsd_file_get(rcu_dereference(s->sc_file->fi_deleg_file));
> rcu_read_unlock();
>
> but that might not be a win.
>
> I agree with Chuck that the WARNing isn't helpful.
>
> NeilBrown
>
Ok, I took a look at this.
To do it right, we'd need to annotate the fi_deleg_file field with
__rcu. That means we'd need to clean up a bunch of existing
fi_deleg_file accesses to properly use rcu_dereference_protected.
This is probably worthwhile stuff to do, but it's a larger patch series
and will touch a bunch of unrelated delegation handling. At this point,
I think I'd rather just keep the spinlocking here since that should be
safe. Cleaning up delegation handling is a longer-term project that I'd
rather table for now.
I will remove the WARN_ON_ONCE though, and I think allowing
nfsd_file_get to accept a NULL pointer is probably a good thing too.
I'll resend a new series in a bit.
>
> > case NFS4_OPEN_STID:
> > case NFS4_LOCK_STID:
> > if (flags & RD_STATE)
> > - return find_readable_file(s->sc_file);
> > + ret = find_readable_file(s->sc_file);
> > else
> > - return find_writeable_file(s->sc_file);
> > + ret = find_writeable_file(s->sc_file);
> > }
> >
> > - return NULL;
> > + return ret;
> > }
> >
> > static __be32
> > --
> > 2.39.0
> >
> >
--
Jeff Layton <[email protected]>
On Fri, 06 Jan 2023, Jeff Layton wrote:
> On Fri, 2023-01-06 at 10:05 +1100, NeilBrown wrote:
> > On Thu, 05 Jan 2023, Jeff Layton wrote:
> > > Even though there is a WARN_ON_ONCE check, it seems possible for
> > > nfs4_find_file to race with the destruction of an fi_deleg_file while
> > > trying to take a reference to it.
> > >
> > > put_deleg_file is done while holding the fi_lock. Take and hold it
> > > when dealing with the fi_deleg_file in nfs4_find_file.
> > >
> > > Signed-off-by: Jeff Layton <[email protected]>
> > > ---
> > > fs/nfsd/nfs4state.c | 16 ++++++++++------
> > > 1 file changed, 10 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index b68238024e49..3df3ae84bd07 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -6417,23 +6417,27 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > static struct nfsd_file *
> > > nfs4_find_file(struct nfs4_stid *s, int flags)
> > > {
> > > + struct nfsd_file *ret = NULL;
> > > +
> > > if (!s)
> > > return NULL;
> > >
> > > switch (s->sc_type) {
> > > case NFS4_DELEG_STID:
> > > - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > > - return NULL;
> > > - return nfsd_file_get(s->sc_file->fi_deleg_file);
> > > + spin_lock(&s->sc_file->fi_lock);
> > > + if (!WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
> > > + ret = nfsd_file_get(s->sc_file->fi_deleg_file);
> > > + spin_unlock(&s->sc_file->fi_lock);
> > > + break;
> >
> > As an nfsd_file is freed with rcu, we don't need the spinlock.
> >
> > rcu_read_lock()
> > ret = rcu_dereference(s->sc_file->fi_deleg_file);
> > if (ret)
> > ret = nfsd_file_get(ret);
> > rcu_read_unlock();
> >
> > You could even put the NULL test in nfsd_file_get() and have:
> >
> > rcu_read_lock()l;
> > ret = nfsd_file_get(rcu_dereference(s->sc_file->fi_deleg_file));
> > rcu_read_unlock();
> >
> > but that might not be a win.
> >
> > I agree with Chuck that the WARNing isn't helpful.
> >
> > NeilBrown
> >
>
> Ok, I took a look at this.
>
> To do it right, we'd need to annotate the fi_deleg_file field with
> __rcu. That means we'd need to clean up a bunch of existing
> fi_deleg_file accesses to properly use rcu_dereference_protected.
>
> This is probably worthwhile stuff to do, but it's a larger patch series
> and will touch a bunch of unrelated delegation handling. At this point,
> I think I'd rather just keep the spinlocking here since that should be
> safe. Cleaning up delegation handling is a longer-term project that I'd
> rather table for now.
That all seems very sensible - thank for looking into it.
NeilBrown
>
> I will remove the WARN_ON_ONCE though, and I think allowing
> nfsd_file_get to accept a NULL pointer is probably a good thing too.
> I'll resend a new series in a bit.
>
> >
> > > case NFS4_OPEN_STID:
> > > case NFS4_LOCK_STID:
> > > if (flags & RD_STATE)
> > > - return find_readable_file(s->sc_file);
> > > + ret = find_readable_file(s->sc_file);
> > > else
> > > - return find_writeable_file(s->sc_file);
> > > + ret = find_writeable_file(s->sc_file);
> > > }
> > >
> > > - return NULL;
> > > + return ret;
> > > }
> > >
> > > static __be32
> > > --
> > > 2.39.0
> > >
> > >
>
> --
> Jeff Layton <[email protected]>
>