2017-10-17 13:55:07

by Benjamin Coddington

[permalink] [raw]
Subject: [PATCH] nfsd4: Prevent the reuse of a closed stateid

While running generic/089 on NFSv4.1, the following on-the-wire exchange
occurs:

Client Server
---------- ----------
OPEN (owner A) ->
<- OPEN response: state A1
CLOSE (state A1)->
OPEN (owner A) ->
<- CLOSE response: state A2
<- OPEN response: state A3
LOCK (state A3) ->
<- LOCK response: NFS4_BAD_STATEID

The server should not be returning state A3 in response to the second OPEN
after processing the CLOSE and sending back state A2. The problem is that
nfsd4_process_open2() is able to find an existing open state just after
nfsd4_close() has incremented the state's sequence number but before
calling unhash_ol_stateid().

Fix this by using the state's sc_type field to identify closed state, and
protect the update of that field with st_mutex. nfsd4_find_existing_open()
will return with the st_mutex held, so that the state will not transition
between being looked up and then updated in nfsd4_process_open2().

Signed-off-by: Benjamin Coddington <[email protected]>
---
fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0c04f81aa63b..17473a092d01 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
static struct nfs4_ol_stateid *
nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
{
- struct nfs4_ol_stateid *local, *ret = NULL;
+ struct nfs4_ol_stateid *local, *ret;
struct nfs4_openowner *oo = open->op_openowner;

- lockdep_assert_held(&fp->fi_lock);
-
+retry:
+ ret = NULL;
+ spin_lock(&fp->fi_lock);
list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
@@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
break;
}
}
+ spin_unlock(&fp->fi_lock);
+
+ /* Did we race with CLOSE? */
+ if (ret) {
+ mutex_lock(&ret->st_mutex);
+ if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
+ mutex_unlock(&ret->st_mutex);
+ nfs4_put_stid(&ret->st_stid);
+ goto retry;
+ }
+ }
+
return ret;
}

@@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
mutex_lock(&stp->st_mutex);

spin_lock(&oo->oo_owner.so_client->cl_lock);
- spin_lock(&fp->fi_lock);

retstp = nfsd4_find_existing_open(fp, open);
if (retstp)
@@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+ spin_lock(&fp->fi_lock);
list_add(&stp->st_perfile, &fp->fi_stateids);
+ spin_unlock(&fp->fi_lock);

out_unlock:
- spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
if (retstp) {
- mutex_lock(&retstp->st_mutex);
/* To keep mutex tracking happy */
mutex_unlock(&stp->st_mutex);
stp = retstp;
@@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
- spin_lock(&fp->fi_lock);
stp = nfsd4_find_existing_open(fp, open);
- spin_unlock(&fp->fi_lock);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
@@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
- mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
mutex_unlock(&stp->st_mutex);
@@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
bool unhashed;
LIST_HEAD(reaplist);

- s->st_stid.sc_type = NFS4_CLOSED_STID;
spin_lock(&clp->cl_lock);
unhashed = unhash_open_stateid(s, &reaplist);

@@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
+ stp->st_stid.sc_type = NFS4_CLOSED_STID;
mutex_unlock(&stp->st_mutex);

nfsd4_close_open_stateid(stp);
--
2.9.3



2017-10-17 16:39:16

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
>
> Client Server
> ---------- ----------
> OPEN (owner A) ->
> <- OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A) ->
> <- CLOSE response: state A2
> <- OPEN response: state A3
> LOCK (state A3) ->
> <- LOCK response: NFS4_BAD_STATEID
>
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2. The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
>
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex. nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
>
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>

The problem is real, but I'm not thrilled with the fix. It seems more
lock thrashy...

Could we instead fix this by just unhashing the stateid earlier, before
the nfs4_inc_and_copy_stateid call?

> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
> static struct nfs4_ol_stateid *
> nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> {
> - struct nfs4_ol_stateid *local, *ret = NULL;
> + struct nfs4_ol_stateid *local, *ret;
> struct nfs4_openowner *oo = open->op_openowner;
>
> - lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> + ret = NULL;
> + spin_lock(&fp->fi_lock);
> list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
> /* ignore lock owners */
> if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> break;
> }
> }
> + spin_unlock(&fp->fi_lock);
> +
> + /* Did we race with CLOSE? */
> + if (ret) {
> + mutex_lock(&ret->st_mutex);
> + if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> + mutex_unlock(&ret->st_mutex);
> + nfs4_put_stid(&ret->st_stid);
> + goto retry;
> + }
> + }
> +
> return ret;
> }
>
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> mutex_lock(&stp->st_mutex);
>
> spin_lock(&oo->oo_owner.so_client->cl_lock);
> - spin_lock(&fp->fi_lock);
>
> retstp = nfsd4_find_existing_open(fp, open);
> if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> stp->st_deny_bmap = 0;
> stp->st_openstp = NULL;
> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> + spin_lock(&fp->fi_lock);
> list_add(&stp->st_perfile, &fp->fi_stateids);
> + spin_unlock(&fp->fi_lock);
>
> out_unlock:
> - spin_unlock(&fp->fi_lock);
> spin_unlock(&oo->oo_owner.so_client->cl_lock);
> if (retstp) {
> - mutex_lock(&retstp->st_mutex);
> /* To keep mutex tracking happy */
> mutex_unlock(&stp->st_mutex);
> stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> status = nfs4_check_deleg(cl, open, &dp);
> if (status)
> goto out;
> - spin_lock(&fp->fi_lock);
> stp = nfsd4_find_existing_open(fp, open);
> - spin_unlock(&fp->fi_lock);
> } else {
> open->op_file = NULL;
> status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> */
> if (stp) {
> /* Stateid was found, this is an OPEN upgrade */
> - mutex_lock(&stp->st_mutex);
> status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
> if (status) {
> mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
> bool unhashed;
> LIST_HEAD(reaplist);
>
> - s->st_stid.sc_type = NFS4_CLOSED_STID;
> spin_lock(&clp->cl_lock);
> unhashed = unhash_open_stateid(s, &reaplist);
>
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (status)
> goto out;
> nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> + stp->st_stid.sc_type = NFS4_CLOSED_STID;
> mutex_unlock(&stp->st_mutex);
>
> nfsd4_close_open_stateid(stp);

--
Jeff Layton <[email protected]>

2017-10-17 17:46:17

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

On 17 Oct 2017, at 12:39, Jeff Layton wrote:

> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire
>> exchange
>> occurs:
>>
>> Client Server
>> ---------- ----------
>> OPEN (owner A) ->
>> <- OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A) ->
>> <- CLOSE response: state A2
>> <- OPEN response: state A3
>> LOCK (state A3) ->
>> <- LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second
>> OPEN
>> after processing the CLOSE and sending back state A2. The problem is
>> that
>> nfsd4_process_open2() is able to find an existing open state just
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state,
>> and
>> protect the update of that field with st_mutex.
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>> 1 file changed, 19 insertions(+), 10 deletions(-)
>>
>
> The problem is real, but I'm not thrilled with the fix. It seems more
> lock thrashy...

Really? I don't think I increased any call counts to spinlocks or mutex
locks. The locking overhead should be equivalent.

> Could we instead fix this by just unhashing the stateid earlier,
> before
> the nfs4_inc_and_copy_stateid call?

I think I tried this - and if I remember correctly, the problem is that
you
can't hold st_mutux while taking the cl_lock (or maybe it was the
fi_lock?).
I tried several simpler ways, and they resulted in deadlocks.

That was a couple of weeks ago, so I apologize if my memory is fuzzy. I
should have sent this patch off to the list then. If you need me to to
verify that, I can - but maybe someone more familiar with the locking
here
can save me that time.

Ben

2017-10-17 18:19:28

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

On Tue, 2017-10-17 at 13:46 -0400, Benjamin Coddington wrote:
> On 17 Oct 2017, at 12:39, Jeff Layton wrote:
>
> > On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> > > While running generic/089 on NFSv4.1, the following on-the-wire
> > > exchange
> > > occurs:
> > >
> > > Client Server
> > > ---------- ----------
> > > OPEN (owner A) ->
> > > <- OPEN response: state A1
> > > CLOSE (state A1)->
> > > OPEN (owner A) ->
> > > <- CLOSE response: state A2
> > > <- OPEN response: state A3
> > > LOCK (state A3) ->
> > > <- LOCK response: NFS4_BAD_STATEID
> > >
> > > The server should not be returning state A3 in response to the second
> > > OPEN
> > > after processing the CLOSE and sending back state A2. The problem is
> > > that
> > > nfsd4_process_open2() is able to find an existing open state just
> > > after
> > > nfsd4_close() has incremented the state's sequence number but before
> > > calling unhash_ol_stateid().
> > >
> > > Fix this by using the state's sc_type field to identify closed state,
> > > and
> > > protect the update of that field with st_mutex.
> > > nfsd4_find_existing_open()
> > > will return with the st_mutex held, so that the state will not
> > > transition
> > > between being looked up and then updated in nfsd4_process_open2().
> > >
> > > Signed-off-by: Benjamin Coddington <[email protected]>
> > > ---
> > > fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
> > > 1 file changed, 19 insertions(+), 10 deletions(-)
> > >
> >
> > The problem is real, but I'm not thrilled with the fix. It seems more
> > lock thrashy...
>
> Really? I don't think I increased any call counts to spinlocks or mutex
> locks. The locking overhead should be equivalent.
>

init_open_stateid will now end up taking fi_lock twice. Also we now have
to take the st_mutex in nfsd4_find_existing_open, just to check sc_type.
Neither of those are probably unreasonable, it's just messier than I'd
like.

> > Could we instead fix this by just unhashing the stateid earlier,
> > before
> > the nfs4_inc_and_copy_stateid call?
>
> I think I tried this - and if I remember correctly, the problem is that
> you
> can't hold st_mutux while taking the cl_lock (or maybe it was the
> fi_lock?).
> I tried several simpler ways, and they resulted in deadlocks.
>
> That was a couple of weeks ago, so I apologize if my memory is fuzzy. I
> should have sent this patch off to the list then. If you need me to to
> verify that, I can - but maybe someone more familiar with the locking
> here
> can save me that time.
>
> Ben


There should be no problem taking the cl_lock while holding st_mutex.
It's never safe to do the reverse though. I'd just do the unhash before
nfs4_inc_and_copy_stateid, and then save the rest of the stuff in
nfsd4_close_open_stateid for after the st_mutex has been dropped.

I think what I'm suggesting is possible. You may need to unroll
nfsd4_close_open_stateid to make it work though.

--
Jeff Layton <[email protected]>

2017-10-17 19:11:26

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
>
> Client Server
> ---------- ----------
> OPEN (owner A) ->
> <- OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A) ->
> <- CLOSE response: state A2
> <- OPEN response: state A3
> LOCK (state A3) ->
> <- LOCK response: NFS4_BAD_STATEID
>
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2. The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
>
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex. nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
>
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
> static struct nfs4_ol_stateid *
> nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> {
> - struct nfs4_ol_stateid *local, *ret = NULL;
> + struct nfs4_ol_stateid *local, *ret;
> struct nfs4_openowner *oo = open->op_openowner;
>
> - lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> + ret = NULL;
> + spin_lock(&fp->fi_lock);
> list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
> /* ignore lock owners */
> if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> break;
> }
> }
> + spin_unlock(&fp->fi_lock);
> +
> + /* Did we race with CLOSE? */
> + if (ret) {
> + mutex_lock(&ret->st_mutex);
> + if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> + mutex_unlock(&ret->st_mutex);
> + nfs4_put_stid(&ret->st_stid);
> + goto retry;
> + }
> + }
> +
> return ret;
> }
>
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> mutex_lock(&stp->st_mutex);
>
> spin_lock(&oo->oo_owner.so_client->cl_lock);
> - spin_lock(&fp->fi_lock);
>
> retstp = nfsd4_find_existing_open(fp, open);
> if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> stp->st_deny_bmap = 0;
> stp->st_openstp = NULL;
> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> + spin_lock(&fp->fi_lock);
> list_add(&stp->st_perfile, &fp->fi_stateids);
> + spin_unlock(&fp->fi_lock);
>

Another potential problem above. I don't think it's be possible with
v4.0, but I think it could happen with v4.1+:

Could we end up with racing opens, such that two different nfsds search
for an existing open and not find one? Then, they both end up here and
insert two different stateids for the same openowner+file.

That's prevented now because we do it all under the same fi_lock, but
that won't be the case here.

> out_unlock:
> - spin_unlock(&fp->fi_lock);
> spin_unlock(&oo->oo_owner.so_client->cl_lock);
> if (retstp) {
> - mutex_lock(&retstp->st_mutex);
> /* To keep mutex tracking happy */
> mutex_unlock(&stp->st_mutex);
> stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> status = nfs4_check_deleg(cl, open, &dp);
> if (status)
> goto out;
> - spin_lock(&fp->fi_lock);
> stp = nfsd4_find_existing_open(fp, open);
> - spin_unlock(&fp->fi_lock);
> } else {
> open->op_file = NULL;
> status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> */
> if (stp) {
> /* Stateid was found, this is an OPEN upgrade */
> - mutex_lock(&stp->st_mutex);
> status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
> if (status) {
> mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
> bool unhashed;
> LIST_HEAD(reaplist);
>
> - s->st_stid.sc_type = NFS4_CLOSED_STID;
> spin_lock(&clp->cl_lock);
> unhashed = unhash_open_stateid(s, &reaplist);
>
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (status)
> goto out;
> nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> + stp->st_stid.sc_type = NFS4_CLOSED_STID;
> mutex_unlock(&stp->st_mutex);
>
> nfsd4_close_open_stateid(stp);

--
Jeff Layton <[email protected]>

2017-10-17 20:40:08

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid



On 17 Oct 2017, at 14:19, Jeff Layton wrote:

> On Tue, 2017-10-17 at 13:46 -0400, Benjamin Coddington wrote:
>> On 17 Oct 2017, at 12:39, Jeff Layton wrote:
>>
>>> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>>>> While running generic/089 on NFSv4.1, the following on-the-wire
>>>> exchange
>>>> occurs:
>>>>
>>>> Client Server
>>>> ---------- ----------
>>>> OPEN (owner A) ->
>>>> <- OPEN response: state A1
>>>> CLOSE (state A1)->
>>>> OPEN (owner A) ->
>>>> <- CLOSE response: state A2
>>>> <- OPEN response: state A3
>>>> LOCK (state A3) ->
>>>> <- LOCK response: NFS4_BAD_STATEID
>>>>
>>>> The server should not be returning state A3 in response to the second
>>>> OPEN
>>>> after processing the CLOSE and sending back state A2. The problem is
>>>> that
>>>> nfsd4_process_open2() is able to find an existing open state just
>>>> after
>>>> nfsd4_close() has incremented the state's sequence number but before
>>>> calling unhash_ol_stateid().
>>>>
>>>> Fix this by using the state's sc_type field to identify closed state,
>>>> and
>>>> protect the update of that field with st_mutex.
>>>> nfsd4_find_existing_open()
>>>> will return with the st_mutex held, so that the state will not
>>>> transition
>>>> between being looked up and then updated in nfsd4_process_open2().
>>>>
>>>> Signed-off-by: Benjamin Coddington <[email protected]>
>>>> ---
>>>> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>>>> 1 file changed, 19 insertions(+), 10 deletions(-)
>>>>
>>>
>>> The problem is real, but I'm not thrilled with the fix. It seems more
>>> lock thrashy...
>>
>> Really? I don't think I increased any call counts to spinlocks or mutex
>> locks. The locking overhead should be equivalent.
>>
>
> init_open_stateid will now end up taking fi_lock twice.

Yes, that's true unless there's an existing state.

> Also we now have to take the st_mutex in nfsd4_find_existing_open, just to
> check sc_type. Neither of those are probably unreasonable, it's just
> messier than I'd like.

It is indeed messy.. no argument. I'll spin up your suggestion to unhash
the stateid before updating and take it for a ride and let you know the
results. Thanks for looking at this.

Ben

2017-10-17 20:44:29

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid



On 17 Oct 2017, at 15:11, Jeff Layton wrote:

> On Tue, 2017-10-17 at 09:55 -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire
>> exchange
>> occurs:
>>
>> Client Server
>> ---------- ----------
>> OPEN (owner A) ->
>> <- OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A) ->
>> <- CLOSE response: state A2
>> <- OPEN response: state A3
>> LOCK (state A3) ->
>> <- LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second
>> OPEN
>> after processing the CLOSE and sending back state A2. The problem is
>> that
>> nfsd4_process_open2() is able to find an existing open state just
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state,
>> and
>> protect the update of that field with st_mutex.
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>> 1 file changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81aa63b..17473a092d01 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -3503,11 +3503,12 @@ static const struct
>> nfs4_stateowner_operations openowner_ops = {
>> static struct nfs4_ol_stateid *
>> nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open
>> *open)
>> {
>> - struct nfs4_ol_stateid *local, *ret = NULL;
>> + struct nfs4_ol_stateid *local, *ret;
>> struct nfs4_openowner *oo = open->op_openowner;
>>
>> - lockdep_assert_held(&fp->fi_lock);
>> -
>> +retry:
>> + ret = NULL;
>> + spin_lock(&fp->fi_lock);
>> list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>> /* ignore lock owners */
>> if (local->st_stateowner->so_is_open_owner == 0)
>> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp,
>> struct nfsd4_open *open)
>> break;
>> }
>> }
>> + spin_unlock(&fp->fi_lock);
>> +
>> + /* Did we race with CLOSE? */
>> + if (ret) {
>> + mutex_lock(&ret->st_mutex);
>> + if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
>> + mutex_unlock(&ret->st_mutex);
>> + nfs4_put_stid(&ret->st_stid);
>> + goto retry;
>> + }
>> + }
>> +
>> return ret;
>> }
>>
>> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct
>> nfsd4_open *open)
>> mutex_lock(&stp->st_mutex);
>>
>> spin_lock(&oo->oo_owner.so_client->cl_lock);
>> - spin_lock(&fp->fi_lock);
>>
>> retstp = nfsd4_find_existing_open(fp, open);
>> if (retstp)
>> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp,
>> struct nfsd4_open *open)
>> stp->st_deny_bmap = 0;
>> stp->st_openstp = NULL;
>> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
>> + spin_lock(&fp->fi_lock);
>> list_add(&stp->st_perfile, &fp->fi_stateids);
>> + spin_unlock(&fp->fi_lock);
>>
>
> Another potential problem above. I don't think it's be possible with
> v4.0, but I think it could happen with v4.1+:
>
> Could we end up with racing opens, such that two different nfsds
> search
> for an existing open and not find one? Then, they both end up here and
> insert two different stateids for the same openowner+file.
>
> That's prevented now because we do it all under the same fi_lock, but
> that won't be the case here.

Yes, that's definitely a problem, and its reminded me why I kept
dropping
fi_lock - you can't take the st_mutex while holding it.. This is a
tangly
bit of locking in here.. I'll see what I can come up with.

Ben

2017-10-19 00:48:36

by Andrew W Elble

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid


Benjamin Coddington <[email protected]> writes:

> On 17 Oct 2017, at 14:19, Jeff Layton wrote:
>
>> Also we now have to take the st_mutex in nfsd4_find_existing_open, just to
>> check sc_type. Neither of those are probably unreasonable, it's just
>> messier than I'd like.
>
> It is indeed messy.. no argument. I'll spin up your suggestion to unhash
> the stateid before updating and take it for a ride and let you know the
> results. Thanks for looking at this.

I threw this against a quick lockdep run and didn't see anything that
surprised me. I think we developed a harmless warning in
nfsd4_process_open2() a ways back?

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 94ef63a10146..87535f2688be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -64,6 +64,9 @@
static const stateid_t currentstateid = {
.si_generation = 1,
};
+static const stateid_t invalidstateid = {
+ .si_generation = U32_MAX,
+};

static u64 current_sessionid = 1;

@@ -5362,11 +5365,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
- nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- mutex_unlock(&stp->st_mutex);
+ memcpy(&close->cl_stateid, &invalidstateid, sizeof(stateid_t));

nfsd4_close_open_stateid(stp);

+ mutex_unlock(&stp->st_mutex);
/* put reference from nfs4_preprocess_seqid_op */
nfs4_put_stid(&stp->st_stid);
out:
--
1.8.3.1

--
Andrew W. Elble
[email protected]
Infrastructure Engineer, Communications Technical Lead
Rochester Institute of Technology
PGP: BFAD 8461 4CCF DC95 DA2C B0EB 965B 082E 863E C912

2017-10-19 12:06:23

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

On 18 Oct 2017, at 20:48, Andrew W Elble wrote:

> Benjamin Coddington <[email protected]> writes:
>
>> On 17 Oct 2017, at 14:19, Jeff Layton wrote:
>>
>>> Also we now have to take the st_mutex in nfsd4_find_existing_open,
>>> just to
>>> check sc_type. Neither of those are probably unreasonable, it's
>>> just
>>> messier than I'd like.
>>
>> It is indeed messy.. no argument. I'll spin up your suggestion to
>> unhash
>> the stateid before updating and take it for a ride and let you know
>> the
>> results. Thanks for looking at this.
>
> I threw this against a quick lockdep run and didn't see anything that
> surprised me. I think we developed a harmless warning in
> nfsd4_process_open2() a ways back?
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 94ef63a10146..87535f2688be 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -64,6 +64,9 @@
> static const stateid_t currentstateid = {
> .si_generation = 1,
> };
> +static const stateid_t invalidstateid = {
> + .si_generation = U32_MAX,
> +};
>
> static u64 current_sessionid = 1;
>
> @@ -5362,11 +5365,11 @@ static void nfsd4_close_open_stateid(struct
> nfs4_ol_stateid *s)
> nfsd4_bump_seqid(cstate, status);
> if (status)
> goto out;
> - nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> - mutex_unlock(&stp->st_mutex);
> + memcpy(&close->cl_stateid, &invalidstateid, sizeof(stateid_t));
>
> nfsd4_close_open_stateid(stp);
>
> + mutex_unlock(&stp->st_mutex);
> /* put reference from nfs4_preprocess_seqid_op */
> nfs4_put_stid(&stp->st_stid);
> out:

I don't think this resolves the race. We'll still race in and find the
stateid in
nfsd4_process_open2()
nfsd4_find_existing_open()

So the client will see the response to the second OPEN after the CLOSE
as
having valid state with seqid of 2, and then the server will dispose of
that
state. The next operation will return with BAD_STATEID.

Ben

2017-11-10 22:03:18

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

I'm assuming this has been addressed by Trond's series (in linux-next
now); but I haven't checked carefully....

--b.

On Tue, Oct 17, 2017 at 09:55:05AM -0400, Benjamin Coddington wrote:
> While running generic/089 on NFSv4.1, the following on-the-wire exchange
> occurs:
>
> Client Server
> ---------- ----------
> OPEN (owner A) ->
> <- OPEN response: state A1
> CLOSE (state A1)->
> OPEN (owner A) ->
> <- CLOSE response: state A2
> <- OPEN response: state A3
> LOCK (state A3) ->
> <- LOCK response: NFS4_BAD_STATEID
>
> The server should not be returning state A3 in response to the second OPEN
> after processing the CLOSE and sending back state A2. The problem is that
> nfsd4_process_open2() is able to find an existing open state just after
> nfsd4_close() has incremented the state's sequence number but before
> calling unhash_ol_stateid().
>
> Fix this by using the state's sc_type field to identify closed state, and
> protect the update of that field with st_mutex. nfsd4_find_existing_open()
> will return with the st_mutex held, so that the state will not transition
> between being looked up and then updated in nfsd4_process_open2().
>
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0c04f81aa63b..17473a092d01 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3503,11 +3503,12 @@ static const struct nfs4_stateowner_operations openowner_ops = {
> static struct nfs4_ol_stateid *
> nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> {
> - struct nfs4_ol_stateid *local, *ret = NULL;
> + struct nfs4_ol_stateid *local, *ret;
> struct nfs4_openowner *oo = open->op_openowner;
>
> - lockdep_assert_held(&fp->fi_lock);
> -
> +retry:
> + ret = NULL;
> + spin_lock(&fp->fi_lock);
> list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
> /* ignore lock owners */
> if (local->st_stateowner->so_is_open_owner == 0)
> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> break;
> }
> }
> + spin_unlock(&fp->fi_lock);
> +
> + /* Did we race with CLOSE? */
> + if (ret) {
> + mutex_lock(&ret->st_mutex);
> + if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
> + mutex_unlock(&ret->st_mutex);
> + nfs4_put_stid(&ret->st_stid);
> + goto retry;
> + }
> + }
> +
> return ret;
> }
>
> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> mutex_lock(&stp->st_mutex);
>
> spin_lock(&oo->oo_owner.so_client->cl_lock);
> - spin_lock(&fp->fi_lock);
>
> retstp = nfsd4_find_existing_open(fp, open);
> if (retstp)
> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
> stp->st_deny_bmap = 0;
> stp->st_openstp = NULL;
> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> + spin_lock(&fp->fi_lock);
> list_add(&stp->st_perfile, &fp->fi_stateids);
> + spin_unlock(&fp->fi_lock);
>
> out_unlock:
> - spin_unlock(&fp->fi_lock);
> spin_unlock(&oo->oo_owner.so_client->cl_lock);
> if (retstp) {
> - mutex_lock(&retstp->st_mutex);
> /* To keep mutex tracking happy */
> mutex_unlock(&stp->st_mutex);
> stp = retstp;
> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> status = nfs4_check_deleg(cl, open, &dp);
> if (status)
> goto out;
> - spin_lock(&fp->fi_lock);
> stp = nfsd4_find_existing_open(fp, open);
> - spin_unlock(&fp->fi_lock);
> } else {
> open->op_file = NULL;
> status = nfserr_bad_stateid;
> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> */
> if (stp) {
> /* Stateid was found, this is an OPEN upgrade */
> - mutex_lock(&stp->st_mutex);
> status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
> if (status) {
> mutex_unlock(&stp->st_mutex);
> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
> bool unhashed;
> LIST_HEAD(reaplist);
>
> - s->st_stid.sc_type = NFS4_CLOSED_STID;
> spin_lock(&clp->cl_lock);
> unhashed = unhash_open_stateid(s, &reaplist);
>
> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (status)
> goto out;
> nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> + stp->st_stid.sc_type = NFS4_CLOSED_STID;
> mutex_unlock(&stp->st_mutex);
>
> nfsd4_close_open_stateid(stp);
> --
> 2.9.3

2017-11-13 13:22:53

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] nfsd4: Prevent the reuse of a closed stateid

Yes, I believe it has, though I haven't tested it yet, unfortunately.

On 10 Nov 2017, at 17:03, J. Bruce Fields wrote:

> I'm assuming this has been addressed by Trond's series (in linux-next
> now); but I haven't checked carefully....
>
> --b.
>
> On Tue, Oct 17, 2017 at 09:55:05AM -0400, Benjamin Coddington wrote:
>> While running generic/089 on NFSv4.1, the following on-the-wire
>> exchange
>> occurs:
>>
>> Client Server
>> ---------- ----------
>> OPEN (owner A) ->
>> <- OPEN response: state A1
>> CLOSE (state A1)->
>> OPEN (owner A) ->
>> <- CLOSE response: state A2
>> <- OPEN response: state A3
>> LOCK (state A3) ->
>> <- LOCK response: NFS4_BAD_STATEID
>>
>> The server should not be returning state A3 in response to the second
>> OPEN
>> after processing the CLOSE and sending back state A2. The problem is
>> that
>> nfsd4_process_open2() is able to find an existing open state just
>> after
>> nfsd4_close() has incremented the state's sequence number but before
>> calling unhash_ol_stateid().
>>
>> Fix this by using the state's sc_type field to identify closed state,
>> and
>> protect the update of that field with st_mutex.
>> nfsd4_find_existing_open()
>> will return with the st_mutex held, so that the state will not
>> transition
>> between being looked up and then updated in nfsd4_process_open2().
>>
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 29 +++++++++++++++++++----------
>> 1 file changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 0c04f81aa63b..17473a092d01 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -3503,11 +3503,12 @@ static const struct
>> nfs4_stateowner_operations openowner_ops = {
>> static struct nfs4_ol_stateid *
>> nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open
>> *open)
>> {
>> - struct nfs4_ol_stateid *local, *ret = NULL;
>> + struct nfs4_ol_stateid *local, *ret;
>> struct nfs4_openowner *oo = open->op_openowner;
>>
>> - lockdep_assert_held(&fp->fi_lock);
>> -
>> +retry:
>> + ret = NULL;
>> + spin_lock(&fp->fi_lock);
>> list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
>> /* ignore lock owners */
>> if (local->st_stateowner->so_is_open_owner == 0)
>> @@ -3518,6 +3519,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp,
>> struct nfsd4_open *open)
>> break;
>> }
>> }
>> + spin_unlock(&fp->fi_lock);
>> +
>> + /* Did we race with CLOSE? */
>> + if (ret) {
>> + mutex_lock(&ret->st_mutex);
>> + if (ret->st_stid.sc_type != NFS4_OPEN_STID) {
>> + mutex_unlock(&ret->st_mutex);
>> + nfs4_put_stid(&ret->st_stid);
>> + goto retry;
>> + }
>> + }
>> +
>> return ret;
>> }
>>
>> @@ -3566,7 +3579,6 @@ init_open_stateid(struct nfs4_file *fp, struct
>> nfsd4_open *open)
>> mutex_lock(&stp->st_mutex);
>>
>> spin_lock(&oo->oo_owner.so_client->cl_lock);
>> - spin_lock(&fp->fi_lock);
>>
>> retstp = nfsd4_find_existing_open(fp, open);
>> if (retstp)
>> @@ -3583,13 +3595,13 @@ init_open_stateid(struct nfs4_file *fp,
>> struct nfsd4_open *open)
>> stp->st_deny_bmap = 0;
>> stp->st_openstp = NULL;
>> list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
>> + spin_lock(&fp->fi_lock);
>> list_add(&stp->st_perfile, &fp->fi_stateids);
>> + spin_unlock(&fp->fi_lock);
>>
>> out_unlock:
>> - spin_unlock(&fp->fi_lock);
>> spin_unlock(&oo->oo_owner.so_client->cl_lock);
>> if (retstp) {
>> - mutex_lock(&retstp->st_mutex);
>> /* To keep mutex tracking happy */
>> mutex_unlock(&stp->st_mutex);
>> stp = retstp;
>> @@ -4403,9 +4415,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp,
>> struct svc_fh *current_fh, struct nf
>> status = nfs4_check_deleg(cl, open, &dp);
>> if (status)
>> goto out;
>> - spin_lock(&fp->fi_lock);
>> stp = nfsd4_find_existing_open(fp, open);
>> - spin_unlock(&fp->fi_lock);
>> } else {
>> open->op_file = NULL;
>> status = nfserr_bad_stateid;
>> @@ -4419,7 +4429,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp,
>> struct svc_fh *current_fh, struct nf
>> */
>> if (stp) {
>> /* Stateid was found, this is an OPEN upgrade */
>> - mutex_lock(&stp->st_mutex);
>> status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
>> if (status) {
>> mutex_unlock(&stp->st_mutex);
>> @@ -5294,7 +5303,6 @@ static void nfsd4_close_open_stateid(struct
>> nfs4_ol_stateid *s)
>> bool unhashed;
>> LIST_HEAD(reaplist);
>>
>> - s->st_stid.sc_type = NFS4_CLOSED_STID;
>> spin_lock(&clp->cl_lock);
>> unhashed = unhash_open_stateid(s, &reaplist);
>>
>> @@ -5335,6 +5343,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct
>> nfsd4_compound_state *cstate,
>> if (status)
>> goto out;
>> nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
>> + stp->st_stid.sc_type = NFS4_CLOSED_STID;
>> mutex_unlock(&stp->st_mutex);
>>
>> nfsd4_close_open_stateid(stp);
>> --
>> 2.9.3
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs"
> in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html