2014-07-10 18:07:44

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 00/11] nfsd: deny mode handling overhaul

Here is the deny mode handling overhaul broken out into a separate
series, as requested by Christoph. I've tried to address most of
his review comments, but I may have missed some. Let me know if
if I did.

Part of the client_mutex removal series involves cleaning up the
handling of deny modes in the server in order to setting and
enforcement atomic.

This does add a new per-nfs4_file spinlock, but in principle it
should never be contended while the client_mutex is still wrapped
around all of this code.

Jeff Layton (8):
nfsd: refactor nfs4_file_get_access and nfs4_file_put_access
nfsd: remove nfs4_file_put_fd
nfsd: shrink st_access_bmap and st_deny_bmap
nfsd: set stateid access and deny bits in nfs4_get_vfs_file
nfsd: clean up reset_union_bmap_deny
nfsd: always hold the fi_lock when bumping fi_access refcounts
nfsd: make deny mode enforcement more efficient and close races in it
nfsd: cleanup and rename nfs4_check_open

Trond Myklebust (3):
nfsd: Add fine grained protection for the nfs4_file->fi_stateids list
nfsd: Add locking to the nfs4_file->fi_fds[] array
nfsd: clean up helper __release_lock_stateid

fs/nfsd/nfs4state.c | 437 ++++++++++++++++++++++++++++++++++++++--------------
fs/nfsd/state.h | 32 +---
2 files changed, 325 insertions(+), 144 deletions(-)

--
1.9.3



2014-07-10 18:07:47

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 02/11] nfsd: Add locking to the nfs4_file->fi_fds[] array

From: Trond Myklebust <[email protected]>

Preparation for removal of the client_mutex, which currently protects
this array. While we don't actually need the find_*_file_locked variants
just yet, a later patch will. So go ahead and add them now to reduce
future churn in this code.

Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: Jeff Layton <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/nfs4state.c | 134 +++++++++++++++++++++++++++++++++++++++++++++-------
fs/nfsd/state.h | 26 ----------
2 files changed, 118 insertions(+), 42 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index cfb10d060c83..314dc8061461 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -268,6 +268,79 @@ get_nfs4_file(struct nfs4_file *fi)
atomic_inc(&fi->fi_ref);
}

+static struct file *
+__nfs4_get_fd(struct nfs4_file *f, int oflag)
+{
+ if (f->fi_fds[oflag])
+ return get_file(f->fi_fds[oflag]);
+ return NULL;
+}
+
+static struct file *
+find_writeable_file_locked(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ lockdep_assert_held(&f->fi_lock);
+
+ ret = __nfs4_get_fd(f, O_WRONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDWR);
+ return ret;
+}
+
+static struct file *
+find_writeable_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = find_writeable_file_locked(f);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
+static struct file *find_readable_file_locked(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ lockdep_assert_held(&f->fi_lock);
+
+ ret = __nfs4_get_fd(f, O_RDONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDWR);
+ return ret;
+}
+
+static struct file *
+find_readable_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = find_readable_file_locked(f);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
+static struct file *
+find_any_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = __nfs4_get_fd(f, O_RDWR);
+ if (!ret) {
+ ret = __nfs4_get_fd(f, O_WRONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDONLY);
+ }
+ spin_unlock(&f->fi_lock);
+ return ret;
+}
+
static int num_delegations;
unsigned long max_delegations;

@@ -316,20 +389,31 @@ static void nfs4_file_get_access(struct nfs4_file *fp, int oflag)
__nfs4_file_get_access(fp, oflag);
}

-static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
+static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
{
- if (fp->fi_fds[oflag]) {
- fput(fp->fi_fds[oflag]);
- fp->fi_fds[oflag] = NULL;
- }
+ struct file *filp;
+
+ filp = fp->fi_fds[oflag];
+ fp->fi_fds[oflag] = NULL;
+ return filp;
}

static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
{
- if (atomic_dec_and_test(&fp->fi_access[oflag])) {
- nfs4_file_put_fd(fp, oflag);
+ might_lock(&fp->fi_lock);
+
+ if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
+ struct file *f1 = NULL;
+ struct file *f2 = NULL;
+
+ f1 = nfs4_file_put_fd(fp, oflag);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
- nfs4_file_put_fd(fp, O_RDWR);
+ f2 = nfs4_file_put_fd(fp, O_RDWR);
+ spin_unlock(&fp->fi_lock);
+ if (f1)
+ fput(f1);
+ if (f2)
+ fput(f2);
}
}

@@ -737,8 +821,10 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp)
unhash_generic_stateid(stp);
unhash_stid(&stp->st_stid);
file = find_any_file(stp->st_file);
- if (file)
+ if (file) {
locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
+ fput(file);
+ }
close_generic_stateid(stp);
free_generic_stateid(stp);
}
@@ -3206,17 +3292,27 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
struct svc_fh *cur_fh, struct nfsd4_open *open)
{
+ struct file *filp = NULL;
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);

+ spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
- &fp->fi_fds[oflag]);
+ spin_unlock(&fp->fi_lock);
+ status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
if (status)
goto out;
+ spin_lock(&fp->fi_lock);
+ if (!fp->fi_fds[oflag]) {
+ fp->fi_fds[oflag] = filp;
+ filp = NULL;
+ }
}
nfs4_file_get_access(fp, oflag);
+ spin_unlock(&fp->fi_lock);
+ if (filp)
+ fput(filp);

status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -3301,13 +3397,15 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
if (status)
goto out_free;
fp->fi_lease = fl;
- fp->fi_deleg_file = get_file(fl->fl_file);
+ fp->fi_deleg_file = fl->fl_file;
atomic_set(&fp->fi_delegees, 1);
spin_lock(&state_lock);
hash_delegation_locked(dp, fp);
spin_unlock(&state_lock);
return 0;
out_free:
+ if (fl->fl_file)
+ fput(fl->fl_file);
locks_free_lock(fl);
return status;
}
@@ -3905,6 +4003,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
status = nfserr_serverfault;
goto out;
}
+ get_file(file);
}
break;
case NFS4_OPEN_STID:
@@ -3932,7 +4031,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
}
status = nfs_ok;
if (file)
- *filpp = get_file(file);
+ *filpp = file;
out:
nfs4_unlock_state();
return status;
@@ -4653,6 +4752,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
}
out:
+ if (filp)
+ fput(filp);
if (status && new_state)
release_lock_stateid(lock_stp);
nfsd4_bump_seqid(cstate, status);
@@ -4793,7 +4894,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto out;
+ goto fput;
}
locks_init_lock(file_lock);
file_lock->fl_type = F_UNLCK;
@@ -4815,7 +4916,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
-
+fput:
+ fput(filp);
out:
nfsd4_bump_seqid(cstate, status);
if (!cstate->replay_owner)
@@ -4826,7 +4928,7 @@ out:

out_nfserr:
status = nfserrno(err);
- goto out;
+ goto fput;
}

/*
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 04737b3ed363..9f1159d5de56 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -398,32 +398,6 @@ struct nfs4_file {
bool fi_had_conflict;
};

-/* XXX: for first cut may fall back on returning file that doesn't work
- * at all? */
-static inline struct file *find_writeable_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_WRONLY])
- return f->fi_fds[O_WRONLY];
- return f->fi_fds[O_RDWR];
-}
-
-static inline struct file *find_readable_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_RDONLY])
- return f->fi_fds[O_RDONLY];
- return f->fi_fds[O_RDWR];
-}
-
-static inline struct file *find_any_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_RDWR])
- return f->fi_fds[O_RDWR];
- else if (f->fi_fds[O_WRONLY])
- return f->fi_fds[O_WRONLY];
- else
- return f->fi_fds[O_RDONLY];
-}
-
/* "ol" stands for "Open or Lock". Better suggestions welcome. */
struct nfs4_ol_stateid {
struct nfs4_stid st_stid; /* must be first field */
--
1.9.3


2014-07-10 18:07:57

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 08/11] nfsd: clean up reset_union_bmap_deny

Fix the "deny" argument type, and start the loop at 1. The 0 iteration
is always a noop.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0a54fc956463..5f7294712ad4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4266,10 +4266,11 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
}

static void
-reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp)
+reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
int i;
- for (i = 0; i < 4; i++) {
+
+ for (i = 1; i < 4; i++) {
if ((i & deny) != i)
clear_deny(i, stp);
}
--
1.9.3


2014-07-14 13:38:20

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Sun, Jul 13, 2014 at 07:52:50AM -0400, Jeff Layton wrote:
> On Sun, 13 Jul 2014 04:42:44 -0700
> Christoph Hellwig <[email protected]> wrote:
>
> > On Fri, Jul 11, 2014 at 11:42:27AM -0400, Jeff Layton wrote:
> > > Yes, thanks to both of you. The review and merging are much
> > > appreciated. Only 89 patches to go once these go in!
> >
> > Might help if we can get them into slighly smaller reviewable chunks,
> > e.g. the next batch might be the stateid refcounting and everything
> > around it, deferring the various client and session related bits and the
> > fault injection changes for now.
>
> That should be doable as long as we're ok with adding in superfluous
> spinlocking until the client_mutex is removed.

It means that an attempt to bisect any race introduced by the series
will land on the one patch that drops the state lock.

But maybe that's the best we can do.

--b.

> I'll work on breaking
> out the next set. It might be a few days as I'm still looking at
> whether it's feasible to not alter the sc_type like the code does today.

2014-07-11 17:31:35

by Frank Filz

[permalink] [raw]
Subject: RE: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

> The current enforcement of deny modes is both inefficient and scattered
> across several places, which makes it hard to guarantee atomicity. The
> inefficiency is a problem now, and the lack of atomicity will mean races
once
> the client_mutex is removed.
>
> First, we address the inefficiency. We have to track deny modes on a per-
> stateid basis to ensure that open downgrades are sane, but when the server
> goes to enforce them it has to walk the entire list of stateids and check
> against each one.
>
> Instead of doing that, maintain a per-nfs4_file deny mode. When a file is
> opened, we simply set any deny bits in that mode that were specified in
the
> OPEN call. We can then use that unified deny mode to do a simple check to
> see whether there are any conflicts without needing to walk the entire
> stateid list.
>
> The only time we'll need to walk the entire list of stateids is when a
stateid
> that has a deny mode on it is being released, or one is having its deny
mode
> downgraded. In that case, we must walk the entire list and recalculate the
> fi_share_deny field. Since deny modes are pretty rare today, this should
be
> very rare under normal workloads.

What we do in Ganesha to avoid walking the list of stateids on release is
maintain the effective deny (and access) mode not at bits, but as a counter
for each bit. Thus, to remove a SHARE_ACCESS_READ | SHARE_DENY_WRITE, you
decrement the counts for access_read and deny_write.

Frank



2014-07-11 15:42:29

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Fri, 11 Jul 2014 10:31:28 -0400
"J. Bruce Fields" <[email protected]> wrote:

> On Fri, Jul 11, 2014 at 12:46:56AM -0700, Christoph Hellwig wrote:
> > On Thu, Jul 10, 2014 at 04:14:48PM -0400, J. Bruce Fields wrote:
> > > On Thu, Jul 10, 2014 at 02:07:24PM -0400, Jeff Layton wrote:
> > > > Here is the deny mode handling overhaul broken out into a separate
> > > > series, as requested by Christoph. I've tried to address most of
> > > > his review comments, but I may have missed some. Let me know if
> > > > if I did.
> > >
> > > They look fine to me; I'll merge them soon absent any objections from
> > > Christoph.
> >
> > The whole series looks good, although I've not tested it yet, just the
> > original full series.
> >
> > Reviewed-by: Christoph Hellwig <[email protected]>
>
> Thanks!
>

Yes, thanks to both of you. The review and merging are much
appreciated. Only 89 patches to go once these go in!

--
Jeff Layton <[email protected]>

2014-07-11 17:56:35

by Frank Filz

[permalink] [raw]
Subject: RE: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

> On Fri, 11 Jul 2014 10:31:26 -0700
> "Frank Filz" <[email protected]> wrote:
>
> > > The current enforcement of deny modes is both inefficient and
> > > scattered across several places, which makes it hard to guarantee
> > > atomicity. The inefficiency is a problem now, and the lack of
> > > atomicity will mean races
> > once
> > > the client_mutex is removed.
> > >
> > > First, we address the inefficiency. We have to track deny modes on a
> > > per- stateid basis to ensure that open downgrades are sane, but when
> > > the server goes to enforce them it has to walk the entire list of
> > > stateids and check against each one.
> > >
> > > Instead of doing that, maintain a per-nfs4_file deny mode. When a
> > > file is opened, we simply set any deny bits in that mode that were
> > > specified in
> > the
> > > OPEN call. We can then use that unified deny mode to do a simple
> > > check to see whether there are any conflicts without needing to walk
> > > the entire stateid list.
> > >
> > > The only time we'll need to walk the entire list of stateids is when
> > > a
> > stateid
> > > that has a deny mode on it is being released, or one is having its
> > > deny
> > mode
> > > downgraded. In that case, we must walk the entire list and
> > > recalculate the fi_share_deny field. Since deny modes are pretty
> > > rare today, this should
> > be
> > > very rare under normal workloads.
> >
> > What we do in Ganesha to avoid walking the list of stateids on release
> > is maintain the effective deny (and access) mode not at bits, but as a
> > counter for each bit. Thus, to remove a SHARE_ACCESS_READ |
> > SHARE_DENY_WRITE, you decrement the counts for access_read and
> deny_write.
> >
> > Frank
> >
> >
>
> Sure, that's another possibility that I considered, but I didn't want to
be
> bothered with having to add counters for deny modes. In practice there are
> *no* clients that use them (aside from pynfs and maybe the semi-mythical
> Windows v4.1 client).
>
> With this scheme, deny mode enforcement is pretty darned efficient,
> particularly in the common case where there are no deny modes to enforce.
>
> Any penalty for the use of deny modes is generally paid during the CLOSE
or
> OPEN_DOWNGRADE on behalf of the client that's using them.
> Any RPC from a client that's not won't need to do any extra work (aside
from
> maybe spinning on the fi_lock while another client is having to
recalculate the
> fi_share_deny).

Good point.

Whatever happened to Pavel Shilovsky's O_DENY patch set? I was looking
forward to that for allowing Ganesha and Samba share reservations to more
fully interact with each other...

Frank



2014-07-13 11:52:52

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Sun, 13 Jul 2014 04:42:44 -0700
Christoph Hellwig <[email protected]> wrote:

> On Fri, Jul 11, 2014 at 11:42:27AM -0400, Jeff Layton wrote:
> > Yes, thanks to both of you. The review and merging are much
> > appreciated. Only 89 patches to go once these go in!
>
> Might help if we can get them into slighly smaller reviewable chunks,
> e.g. the next batch might be the stateid refcounting and everything
> around it, deferring the various client and session related bits and the
> fault injection changes for now.

That should be doable as long as we're ok with adding in superfluous
spinlocking until the client_mutex is removed. I'll work on breaking
out the next set. It might be a few days as I'm still looking at
whether it's feasible to not alter the sc_type like the code does today.

--
Jeff Layton <[email protected]>

2014-07-10 20:08:14

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

On Thu, Jul 10, 2014 at 02:07:34PM -0400, Jeff Layton wrote:
> The current enforcement of deny modes is both inefficient and scattered
> across several places, which makes it hard to guarantee atomicity. The
> inefficiency is a problem now, and the lack of atomicity will mean races
> once the client_mutex is removed.
>
> First, we address the inefficiency. We have to track deny modes on a
> per-stateid basis to ensure that open downgrades are sane, but when the
> server goes to enforce them it has to walk the entire list of stateids
> and check against each one.
>
> Instead of doing that, maintain a per-nfs4_file deny mode. When a file
> is opened, we simply set any deny bits in that mode that were specified
> in the OPEN call. We can then use that unified deny mode to do a simple
> check to see whether there are any conflicts without needing to walk the
> entire stateid list.
>
> The only time we'll need to walk the entire list of stateids is when a
> stateid that has a deny mode on it is being released, or one is having
> its deny mode downgraded. In that case, we must walk the entire list and
> recalculate the fi_share_deny field. Since deny modes are pretty rare
> today, this should be very rare under normal workloads.

Yeah, we only care about the bare minimum correctness for deny modes.
Looks good!

--b.

>
> To address the potential for races once the client_mutex is removed,
> protect fi_share_deny with the fi_lock. In nfs4_get_vfs_file, check to
> make sure that any deny mode we want to apply won't conflict with
> existing access. If that's ok, then have nfs4_file_get_access check that
> new access to the file won't conflict with existing deny modes.
>
> If that also passes, then get file access references, set the correct
> access and deny bits in the stateid, and update the fi_share_deny field.
> If opening the file or truncating it fails, then unwind the whole mess
> and return the appropriate error.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 182 +++++++++++++++++++++++++++++++++++-----------------
> fs/nfsd/state.h | 1 +
> 2 files changed, 125 insertions(+), 58 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 8f320f2f8b84..da88b31c0afe 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -394,10 +394,33 @@ nfs4_file_get_access(struct nfs4_file *fp, u32 access)
> if (access & ~NFS4_SHARE_ACCESS_BOTH)
> return nfserr_inval;
>
> + /* Does it conflict with a deny mode already set? */
> + if ((access & fp->fi_share_deny) != 0)
> + return nfserr_share_denied;
> +
> __nfs4_file_get_access(fp, access);
> return nfs_ok;
> }
>
> +static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
> +{
> + /* Common case is that there is no deny mode. */
> + if (deny) {
> + /* Does this deny mode make sense? */
> + if (deny & ~NFS4_SHARE_DENY_BOTH)
> + return nfserr_inval;
> +
> + if ((deny & NFS4_SHARE_DENY_READ) &&
> + atomic_read(&fp->fi_access[O_RDONLY]))
> + return nfserr_share_denied;
> +
> + if ((deny & NFS4_SHARE_DENY_WRITE) &&
> + atomic_read(&fp->fi_access[O_WRONLY]))
> + return nfserr_share_denied;
> + }
> + return nfs_ok;
> +}
> +
> static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
> {
> might_lock(&fp->fi_lock);
> @@ -710,17 +733,6 @@ bmap_to_share_mode(unsigned long bmap) {
> return access;
> }
>
> -static bool
> -test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
> - unsigned int access, deny;
> -
> - access = bmap_to_share_mode(stp->st_access_bmap);
> - deny = bmap_to_share_mode(stp->st_deny_bmap);
> - if ((access & open->op_share_deny) || (deny & open->op_share_access))
> - return false;
> - return true;
> -}
> -
> /* set share access for a given stateid */
> static inline void
> set_access(u32 access, struct nfs4_ol_stateid *stp)
> @@ -793,11 +805,49 @@ static int nfs4_access_to_omode(u32 access)
> return O_RDONLY;
> }
>
> +/*
> + * A stateid that had a deny mode associated with it is being released
> + * or downgraded. Recalculate the deny mode on the file.
> + */
> +static void
> +recalculate_deny_mode(struct nfs4_file *fp)
> +{
> + struct nfs4_ol_stateid *stp;
> +
> + spin_lock(&fp->fi_lock);
> + fp->fi_share_deny = 0;
> + list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
> + fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
> + spin_unlock(&fp->fi_lock);
> +}
> +
> +static void
> +reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
> +{
> + int i;
> + bool change = false;
> +
> + for (i = 1; i < 4; i++) {
> + if ((i & deny) != i) {
> + change = true;
> + clear_deny(i, stp);
> + }
> + }
> +
> + /* Recalculate per-file deny mode if there was a change */
> + if (change)
> + recalculate_deny_mode(stp->st_file);
> +}
> +
> /* release all access and file references for a given stateid */
> static void
> release_all_access(struct nfs4_ol_stateid *stp)
> {
> int i;
> + struct nfs4_file *fp = stp->st_file;
> +
> + if (fp && stp->st_deny_bmap != 0)
> + recalculate_deny_mode(fp);
>
> for (i = 1; i < 4; i++) {
> if (test_access(i, stp))
> @@ -2787,6 +2837,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
> fp->fi_inode = ino;
> fp->fi_had_conflict = false;
> fp->fi_lease = NULL;
> + fp->fi_share_deny = 0;
> memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
> memset(fp->fi_access, 0, sizeof(fp->fi_access));
> hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
> @@ -3014,22 +3065,15 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
> {
> struct inode *ino = current_fh->fh_dentry->d_inode;
> struct nfs4_file *fp;
> - struct nfs4_ol_stateid *stp;
> - __be32 ret;
> + __be32 ret = nfs_ok;
>
> fp = find_file(ino);
> if (!fp)
> - return nfs_ok;
> - ret = nfserr_locked;
> - /* Search for conflicting share reservations */
> + return ret;
> + /* Check for conflicting share reservations */
> spin_lock(&fp->fi_lock);
> - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
> - if (test_deny(deny_type, stp) ||
> - test_deny(NFS4_SHARE_DENY_BOTH, stp))
> - goto out;
> - }
> - ret = nfs_ok;
> -out:
> + if (fp->fi_share_deny & deny_type)
> + ret = nfserr_locked;
> spin_unlock(&fp->fi_lock);
> put_nfs4_file(fp);
> return ret;
> @@ -3265,12 +3309,9 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st
> if (local->st_stateowner->so_is_open_owner == 0)
> continue;
> /* remember if we have seen this open owner */
> - if (local->st_stateowner == &oo->oo_owner)
> + if (local->st_stateowner == &oo->oo_owner) {
> *stpp = local;
> - /* check for conflicting share reservations */
> - if (!test_share(local, open)) {
> - spin_unlock(&fp->fi_lock);
> - return nfserr_share_denied;
> + break;
> }
> }
> spin_unlock(&fp->fi_lock);
> @@ -3311,56 +3352,91 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
> __be32 status;
> int oflag = nfs4_access_to_omode(open->op_share_access);
> int access = nfs4_access_to_access(open->op_share_access);
> + unsigned char old_access_bmap, old_deny_bmap;
>
> spin_lock(&fp->fi_lock);
> +
> + /*
> + * Are we trying to set a deny mode that would conflict with
> + * current access?
> + */
> + status = nfs4_file_check_deny(fp, open->op_share_deny);
> + if (status != nfs_ok) {
> + spin_unlock(&fp->fi_lock);
> + goto out;
> + }
> +
> + /* set access to the file */
> + status = nfs4_file_get_access(fp, open->op_share_access);
> + if (status != nfs_ok) {
> + spin_unlock(&fp->fi_lock);
> + goto out;
> + }
> +
> + /* Set access bits in stateid */
> + old_access_bmap = stp->st_access_bmap;
> + set_access(open->op_share_access, stp);
> +
> + /* Set new deny mask */
> + old_deny_bmap = stp->st_deny_bmap;
> + set_deny(open->op_share_deny, stp);
> + fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
> +
> if (!fp->fi_fds[oflag]) {
> spin_unlock(&fp->fi_lock);
> status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
> if (status)
> - goto out;
> + goto out_put_access;
> spin_lock(&fp->fi_lock);
> if (!fp->fi_fds[oflag]) {
> fp->fi_fds[oflag] = filp;
> filp = NULL;
> }
> }
> - status = nfs4_file_get_access(fp, open->op_share_access);
> spin_unlock(&fp->fi_lock);
> if (filp)
> fput(filp);
> - if (status)
> - goto out_put_access;
>
> status = nfsd4_truncate(rqstp, cur_fh, open);
> if (status)
> goto out_put_access;
> -
> - /* Set access and deny bits in stateid */
> - set_access(open->op_share_access, stp);
> - set_deny(open->op_share_deny, stp);
> - return nfs_ok;
> -
> -out_put_access:
> - nfs4_file_put_access(fp, open->op_share_access);
> out:
> return status;
> +out_put_access:
> + stp->st_access_bmap = old_access_bmap;
> + nfs4_file_put_access(fp, open->op_share_access);
> + reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
> + goto out;
> }
>
> static __be32
> nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
> {
> __be32 status;
> + unsigned char old_deny_bmap;
>
> if (!test_access(open->op_share_access, stp))
> - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
> - else
> - status = nfsd4_truncate(rqstp, cur_fh, open);
> + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
>
> - if (status)
> + /* test and set deny mode */
> + spin_lock(&fp->fi_lock);
> + status = nfs4_file_check_deny(fp, open->op_share_deny);
> + if (status == nfs_ok) {
> + old_deny_bmap = stp->st_deny_bmap;
> + set_deny(open->op_share_deny, stp);
> + fp->fi_share_deny |=
> + (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
> + }
> + spin_unlock(&fp->fi_lock);
> +
> + if (status != nfs_ok)
> return status;
> - return nfs_ok;
> -}
>
> + status = nfsd4_truncate(rqstp, cur_fh, open);
> + if (status != nfs_ok)
> + reset_union_bmap_deny(old_deny_bmap, stp);
> + return status;
> +}
>
> static void
> nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
> @@ -3582,7 +3658,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> */
> fp = find_or_add_file(ino, open->op_file);
> if (fp != open->op_file) {
> - if ((status = nfs4_check_open(fp, open, &stp)))
> + status = nfs4_check_open(fp, open, &stp);
> + if (status)
> goto out;
> status = nfs4_check_deleg(cl, open, &dp);
> if (status)
> @@ -4269,17 +4346,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
> }
> }
>
> -static void
> -reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
> -{
> - int i;
> -
> - for (i = 1; i < 4; i++) {
> - if ((i & deny) != i)
> - clear_deny(i, stp);
> - }
> -}
> -
> __be32
> nfsd4_open_downgrade(struct svc_rqst *rqstp,
> struct nfsd4_compound_state *cstate,
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 72aee4b4f1ae..015b972da8ba 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -391,6 +391,7 @@ struct nfs4_file {
> * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
> */
> atomic_t fi_access[2];
> + u32 fi_share_deny;
> struct file *fi_deleg_file;
> struct file_lock *fi_lease;
> atomic_t fi_delegees;
> --
> 1.9.3
>

2014-07-11 14:31:30

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Fri, Jul 11, 2014 at 12:46:56AM -0700, Christoph Hellwig wrote:
> On Thu, Jul 10, 2014 at 04:14:48PM -0400, J. Bruce Fields wrote:
> > On Thu, Jul 10, 2014 at 02:07:24PM -0400, Jeff Layton wrote:
> > > Here is the deny mode handling overhaul broken out into a separate
> > > series, as requested by Christoph. I've tried to address most of
> > > his review comments, but I may have missed some. Let me know if
> > > if I did.
> >
> > They look fine to me; I'll merge them soon absent any objections from
> > Christoph.
>
> The whole series looks good, although I've not tested it yet, just the
> original full series.
>
> Reviewed-by: Christoph Hellwig <[email protected]>

Thanks!

--b.

2014-07-11 18:09:06

by Frank Filz

[permalink] [raw]
Subject: RE: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

> On Fri, Jul 11, 2014 at 1:56 PM, Frank Filz <[email protected]> wrote:
> >> On Fri, 11 Jul 2014 10:31:26 -0700
> >> "Frank Filz" <[email protected]> wrote:
> >>
> >> > > The current enforcement of deny modes is both inefficient and
> >> > > scattered across several places, which makes it hard to guarantee
> >> > > atomicity. The inefficiency is a problem now, and the lack of
> >> > > atomicity will mean races
> >> > once
> >> > > the client_mutex is removed.
> >> > >
> >> > > First, we address the inefficiency. We have to track deny modes
> >> > > on a
> >> > > per- stateid basis to ensure that open downgrades are sane, but
> >> > > when the server goes to enforce them it has to walk the entire
> >> > > list of stateids and check against each one.
> >> > >
> >> > > Instead of doing that, maintain a per-nfs4_file deny mode. When a
> >> > > file is opened, we simply set any deny bits in that mode that
> >> > > were specified in
> >> > the
> >> > > OPEN call. We can then use that unified deny mode to do a simple
> >> > > check to see whether there are any conflicts without needing to
> >> > > walk the entire stateid list.
> >> > >
> >> > > The only time we'll need to walk the entire list of stateids is
> >> > > when a
> >> > stateid
> >> > > that has a deny mode on it is being released, or one is having
> >> > > its deny
> >> > mode
> >> > > downgraded. In that case, we must walk the entire list and
> >> > > recalculate the fi_share_deny field. Since deny modes are pretty
> >> > > rare today, this should
> >> > be
> >> > > very rare under normal workloads.
> >> >
> >> > What we do in Ganesha to avoid walking the list of stateids on
> >> > release is maintain the effective deny (and access) mode not at
> >> > bits, but as a counter for each bit. Thus, to remove a
> >> > SHARE_ACCESS_READ | SHARE_DENY_WRITE, you decrement the
> counts for
> >> > access_read and
> >> deny_write.
> >> >
> >> > Frank
> >> >
> >> >
> >>
> >> Sure, that's another possibility that I considered, but I didn't want
> >> to
> > be
> >> bothered with having to add counters for deny modes. In practice
> >> there are
> >> *no* clients that use them (aside from pynfs and maybe the
> >> semi-mythical Windows v4.1 client).
>
> You don't need counters for deny modes. There can only be 1 of each, since
> any deny mode has to be part of an OPEN that has at least one non-zero
> share access mode. So a single bit for each should be fine.

You can have any number of:

OPEN SHARE_ACCESS_READ, SHARE_DENY_WRITE...

It's the share reservation equivalent of a read lock...

Frank

> >>
> >> With this scheme, deny mode enforcement is pretty darned efficient,
> >> particularly in the common case where there are no deny modes to
> enforce.
> >>
> >> Any penalty for the use of deny modes is generally paid during the
> >> CLOSE
> > or
> >> OPEN_DOWNGRADE on behalf of the client that's using them.
> >> Any RPC from a client that's not won't need to do any extra work
> >> (aside
> > from
> >> maybe spinning on the fi_lock while another client is having to
> > recalculate the
> >> fi_share_deny).
> >
> > Good point.
> >
> > Whatever happened to Pavel Shilovsky's O_DENY patch set? I was looking
> > forward to that for allowing Ganesha and Samba share reservations to
> > more fully interact with each other...
> >
> > Frank
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs"
> > in the body of a message to [email protected] More
> majordomo
> > info at http://vger.kernel.org/majordomo-info.html
>
>
>
> --
> Trond Myklebust
>
> Linux NFS client maintainer, PrimaryData
>
> [email protected]


2014-07-15 10:00:39

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Mon, Jul 14, 2014 at 09:38:10AM -0400, J. Bruce Fields wrote:
> > That should be doable as long as we're ok with adding in superfluous
> > spinlocking until the client_mutex is removed.
>
> It means that an attempt to bisect any race introduced by the series
> will land on the one patch that drops the state lock.
>
> But maybe that's the best we can do.

That's pretty much exactly how all the earlier series are structured as
well.


2014-07-11 07:46:58

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Thu, Jul 10, 2014 at 04:14:48PM -0400, J. Bruce Fields wrote:
> On Thu, Jul 10, 2014 at 02:07:24PM -0400, Jeff Layton wrote:
> > Here is the deny mode handling overhaul broken out into a separate
> > series, as requested by Christoph. I've tried to address most of
> > his review comments, but I may have missed some. Let me know if
> > if I did.
>
> They look fine to me; I'll merge them soon absent any objections from
> Christoph.

The whole series looks good, although I've not tested it yet, just the
original full series.

Reviewed-by: Christoph Hellwig <[email protected]>

2014-07-10 18:07:53

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 06/11] nfsd: shrink st_access_bmap and st_deny_bmap

We never use anything above bit #3, so an unsigned long for each is
wasteful. Shrink them to a char each, and add some WARN_ON_ONCE calls if
we try to set or clear bits that would go outside those sizes.

Note too that because atomic bitops work on unsigned longs, we have to
abandon their use here. That shouldn't be a problem though since we
don't really care about the atomicity in this code anyway. Using them
was just a convenient way to flip bits.

Signed-off-by: Jeff Layton <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/nfs4state.c | 38 +++++++++++++++++++++++++++-----------
fs/nfsd/state.h | 4 ++--
2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c02bad6d7e90..f7f11631c26c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -721,42 +721,58 @@ test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
static inline void
set_access(u32 access, struct nfs4_ol_stateid *stp)
{
- __set_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap |= mask;
}

/* clear share access for a given stateid */
static inline void
clear_access(u32 access, struct nfs4_ol_stateid *stp)
{
- __clear_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap &= ~mask;
}

/* test whether a given stateid has access */
static inline bool
test_access(u32 access, struct nfs4_ol_stateid *stp)
{
- return test_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ return (bool)(stp->st_access_bmap & mask);
}

/* set share deny for a given stateid */
static inline void
-set_deny(u32 access, struct nfs4_ol_stateid *stp)
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- __set_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap |= mask;
}

/* clear share deny for a given stateid */
static inline void
-clear_deny(u32 access, struct nfs4_ol_stateid *stp)
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- __clear_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap &= ~mask;
}

/* test whether a given stateid is denying specific access */
static inline bool
-test_deny(u32 access, struct nfs4_ol_stateid *stp)
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- return test_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ return (bool)(stp->st_deny_bmap & mask);
}

static int nfs4_access_to_omode(u32 access)
@@ -4282,12 +4298,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
goto out;
status = nfserr_inval;
if (!test_access(od->od_share_access, stp)) {
- dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n",
+ dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
stp->st_access_bmap, od->od_share_access);
goto out;
}
if (!test_deny(od->od_share_deny, stp)) {
- dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
+ dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
stp->st_deny_bmap, od->od_share_deny);
goto out;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9f1159d5de56..72aee4b4f1ae 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -406,8 +406,8 @@ struct nfs4_ol_stateid {
struct list_head st_locks;
struct nfs4_stateowner * st_stateowner;
struct nfs4_file * st_file;
- unsigned long st_access_bmap;
- unsigned long st_deny_bmap;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
struct nfs4_ol_stateid * st_openstp;
};

--
1.9.3


2014-07-11 18:07:38

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

On Fri, 11 Jul 2014 14:00:43 -0400
Trond Myklebust <[email protected]> wrote:

> On Fri, Jul 11, 2014 at 1:56 PM, Frank Filz <[email protected]> wrote:
> >> On Fri, 11 Jul 2014 10:31:26 -0700
> >> "Frank Filz" <[email protected]> wrote:
> >>
> >> > > The current enforcement of deny modes is both inefficient and
> >> > > scattered across several places, which makes it hard to guarantee
> >> > > atomicity. The inefficiency is a problem now, and the lack of
> >> > > atomicity will mean races
> >> > once
> >> > > the client_mutex is removed.
> >> > >
> >> > > First, we address the inefficiency. We have to track deny modes on a
> >> > > per- stateid basis to ensure that open downgrades are sane, but when
> >> > > the server goes to enforce them it has to walk the entire list of
> >> > > stateids and check against each one.
> >> > >
> >> > > Instead of doing that, maintain a per-nfs4_file deny mode. When a
> >> > > file is opened, we simply set any deny bits in that mode that were
> >> > > specified in
> >> > the
> >> > > OPEN call. We can then use that unified deny mode to do a simple
> >> > > check to see whether there are any conflicts without needing to walk
> >> > > the entire stateid list.
> >> > >
> >> > > The only time we'll need to walk the entire list of stateids is when
> >> > > a
> >> > stateid
> >> > > that has a deny mode on it is being released, or one is having its
> >> > > deny
> >> > mode
> >> > > downgraded. In that case, we must walk the entire list and
> >> > > recalculate the fi_share_deny field. Since deny modes are pretty
> >> > > rare today, this should
> >> > be
> >> > > very rare under normal workloads.
> >> >
> >> > What we do in Ganesha to avoid walking the list of stateids on release
> >> > is maintain the effective deny (and access) mode not at bits, but as a
> >> > counter for each bit. Thus, to remove a SHARE_ACCESS_READ |
> >> > SHARE_DENY_WRITE, you decrement the counts for access_read and
> >> deny_write.
> >> >
> >> > Frank
> >> >
> >> >
> >>
> >> Sure, that's another possibility that I considered, but I didn't want to
> > be
> >> bothered with having to add counters for deny modes. In practice there are
> >> *no* clients that use them (aside from pynfs and maybe the semi-mythical
> >> Windows v4.1 client).
>
> You don't need counters for deny modes. There can only be 1 of each,
> since any deny mode has to be part of an OPEN that has at least one
> non-zero share access mode. So a single bit for each should be fine.
>

I'm not sure that's 100% true.

I see no reason that you can't have several clients open the same file
with (e.g.) ACCESS_READ and DENY_WRITE. You wouldn't want to lift the
DENY_WRITE on the file until all of those clients have closed the file
(or downgraded to remove the DENY_WRITE access).

So, I think you do need a little more than just a single set of bits on
the file. You either need to use per-file counters for them there
(which it sounds like ganesha does) or track them on a per-stateid
basis (like knfsd does).

> >>
> >> With this scheme, deny mode enforcement is pretty darned efficient,
> >> particularly in the common case where there are no deny modes to enforce.
> >>
> >> Any penalty for the use of deny modes is generally paid during the CLOSE
> > or
> >> OPEN_DOWNGRADE on behalf of the client that's using them.
> >> Any RPC from a client that's not won't need to do any extra work (aside
> > from
> >> maybe spinning on the fi_lock while another client is having to
> > recalculate the
> >> fi_share_deny).
> >
> > Good point.
> >
> > Whatever happened to Pavel Shilovsky's O_DENY patch set? I was looking
> > forward to that for allowing Ganesha and Samba share reservations to more
> > fully interact with each other...
> >
> > Frank
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
>


--
Jeff Layton <[email protected]>

2014-07-10 18:07:58

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

The current enforcement of deny modes is both inefficient and scattered
across several places, which makes it hard to guarantee atomicity. The
inefficiency is a problem now, and the lack of atomicity will mean races
once the client_mutex is removed.

First, we address the inefficiency. We have to track deny modes on a
per-stateid basis to ensure that open downgrades are sane, but when the
server goes to enforce them it has to walk the entire list of stateids
and check against each one.

Instead of doing that, maintain a per-nfs4_file deny mode. When a file
is opened, we simply set any deny bits in that mode that were specified
in the OPEN call. We can then use that unified deny mode to do a simple
check to see whether there are any conflicts without needing to walk the
entire stateid list.

The only time we'll need to walk the entire list of stateids is when a
stateid that has a deny mode on it is being released, or one is having
its deny mode downgraded. In that case, we must walk the entire list and
recalculate the fi_share_deny field. Since deny modes are pretty rare
today, this should be very rare under normal workloads.

To address the potential for races once the client_mutex is removed,
protect fi_share_deny with the fi_lock. In nfs4_get_vfs_file, check to
make sure that any deny mode we want to apply won't conflict with
existing access. If that's ok, then have nfs4_file_get_access check that
new access to the file won't conflict with existing deny modes.

If that also passes, then get file access references, set the correct
access and deny bits in the stateid, and update the fi_share_deny field.
If opening the file or truncating it fails, then unwind the whole mess
and return the appropriate error.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 182 +++++++++++++++++++++++++++++++++++-----------------
fs/nfsd/state.h | 1 +
2 files changed, 125 insertions(+), 58 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8f320f2f8b84..da88b31c0afe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -394,10 +394,33 @@ nfs4_file_get_access(struct nfs4_file *fp, u32 access)
if (access & ~NFS4_SHARE_ACCESS_BOTH)
return nfserr_inval;

+ /* Does it conflict with a deny mode already set? */
+ if ((access & fp->fi_share_deny) != 0)
+ return nfserr_share_denied;
+
__nfs4_file_get_access(fp, access);
return nfs_ok;
}

+static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
+{
+ /* Common case is that there is no deny mode. */
+ if (deny) {
+ /* Does this deny mode make sense? */
+ if (deny & ~NFS4_SHARE_DENY_BOTH)
+ return nfserr_inval;
+
+ if ((deny & NFS4_SHARE_DENY_READ) &&
+ atomic_read(&fp->fi_access[O_RDONLY]))
+ return nfserr_share_denied;
+
+ if ((deny & NFS4_SHARE_DENY_WRITE) &&
+ atomic_read(&fp->fi_access[O_WRONLY]))
+ return nfserr_share_denied;
+ }
+ return nfs_ok;
+}
+
static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
{
might_lock(&fp->fi_lock);
@@ -710,17 +733,6 @@ bmap_to_share_mode(unsigned long bmap) {
return access;
}

-static bool
-test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
- unsigned int access, deny;
-
- access = bmap_to_share_mode(stp->st_access_bmap);
- deny = bmap_to_share_mode(stp->st_deny_bmap);
- if ((access & open->op_share_deny) || (deny & open->op_share_access))
- return false;
- return true;
-}
-
/* set share access for a given stateid */
static inline void
set_access(u32 access, struct nfs4_ol_stateid *stp)
@@ -793,11 +805,49 @@ static int nfs4_access_to_omode(u32 access)
return O_RDONLY;
}

+/*
+ * A stateid that had a deny mode associated with it is being released
+ * or downgraded. Recalculate the deny mode on the file.
+ */
+static void
+recalculate_deny_mode(struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *stp;
+
+ spin_lock(&fp->fi_lock);
+ fp->fi_share_deny = 0;
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
+ fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
+ spin_unlock(&fp->fi_lock);
+}
+
+static void
+reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ int i;
+ bool change = false;
+
+ for (i = 1; i < 4; i++) {
+ if ((i & deny) != i) {
+ change = true;
+ clear_deny(i, stp);
+ }
+ }
+
+ /* Recalculate per-file deny mode if there was a change */
+ if (change)
+ recalculate_deny_mode(stp->st_file);
+}
+
/* release all access and file references for a given stateid */
static void
release_all_access(struct nfs4_ol_stateid *stp)
{
int i;
+ struct nfs4_file *fp = stp->st_file;
+
+ if (fp && stp->st_deny_bmap != 0)
+ recalculate_deny_mode(fp);

for (i = 1; i < 4; i++) {
if (test_access(i, stp))
@@ -2787,6 +2837,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
fp->fi_inode = ino;
fp->fi_had_conflict = false;
fp->fi_lease = NULL;
+ fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
@@ -3014,22 +3065,15 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
{
struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfs4_file *fp;
- struct nfs4_ol_stateid *stp;
- __be32 ret;
+ __be32 ret = nfs_ok;

fp = find_file(ino);
if (!fp)
- return nfs_ok;
- ret = nfserr_locked;
- /* Search for conflicting share reservations */
+ return ret;
+ /* Check for conflicting share reservations */
spin_lock(&fp->fi_lock);
- list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
- if (test_deny(deny_type, stp) ||
- test_deny(NFS4_SHARE_DENY_BOTH, stp))
- goto out;
- }
- ret = nfs_ok;
-out:
+ if (fp->fi_share_deny & deny_type)
+ ret = nfserr_locked;
spin_unlock(&fp->fi_lock);
put_nfs4_file(fp);
return ret;
@@ -3265,12 +3309,9 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st
if (local->st_stateowner->so_is_open_owner == 0)
continue;
/* remember if we have seen this open owner */
- if (local->st_stateowner == &oo->oo_owner)
+ if (local->st_stateowner == &oo->oo_owner) {
*stpp = local;
- /* check for conflicting share reservations */
- if (!test_share(local, open)) {
- spin_unlock(&fp->fi_lock);
- return nfserr_share_denied;
+ break;
}
}
spin_unlock(&fp->fi_lock);
@@ -3311,56 +3352,91 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
__be32 status;
int oflag = nfs4_access_to_omode(open->op_share_access);
int access = nfs4_access_to_access(open->op_share_access);
+ unsigned char old_access_bmap, old_deny_bmap;

spin_lock(&fp->fi_lock);
+
+ /*
+ * Are we trying to set a deny mode that would conflict with
+ * current access?
+ */
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status != nfs_ok) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+
+ /* set access to the file */
+ status = nfs4_file_get_access(fp, open->op_share_access);
+ if (status != nfs_ok) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
+ }
+
+ /* Set access bits in stateid */
+ old_access_bmap = stp->st_access_bmap;
+ set_access(open->op_share_access, stp);
+
+ /* Set new deny mask */
+ old_deny_bmap = stp->st_deny_bmap;
+ set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+
if (!fp->fi_fds[oflag]) {
spin_unlock(&fp->fi_lock);
status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
if (status)
- goto out;
+ goto out_put_access;
spin_lock(&fp->fi_lock);
if (!fp->fi_fds[oflag]) {
fp->fi_fds[oflag] = filp;
filp = NULL;
}
}
- status = nfs4_file_get_access(fp, open->op_share_access);
spin_unlock(&fp->fi_lock);
if (filp)
fput(filp);
- if (status)
- goto out_put_access;

status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
goto out_put_access;
-
- /* Set access and deny bits in stateid */
- set_access(open->op_share_access, stp);
- set_deny(open->op_share_deny, stp);
- return nfs_ok;
-
-out_put_access:
- nfs4_file_put_access(fp, open->op_share_access);
out:
return status;
+out_put_access:
+ stp->st_access_bmap = old_access_bmap;
+ nfs4_file_put_access(fp, open->op_share_access);
+ reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
+ goto out;
}

static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
{
__be32 status;
+ unsigned char old_deny_bmap;

if (!test_access(open->op_share_access, stp))
- status = nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
- else
- status = nfsd4_truncate(rqstp, cur_fh, open);
+ return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);

- if (status)
+ /* test and set deny mode */
+ spin_lock(&fp->fi_lock);
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status == nfs_ok) {
+ old_deny_bmap = stp->st_deny_bmap;
+ set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |=
+ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ }
+ spin_unlock(&fp->fi_lock);
+
+ if (status != nfs_ok)
return status;
- return nfs_ok;
-}

+ status = nfsd4_truncate(rqstp, cur_fh, open);
+ if (status != nfs_ok)
+ reset_union_bmap_deny(old_deny_bmap, stp);
+ return status;
+}

static void
nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
@@ -3582,7 +3658,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
fp = find_or_add_file(ino, open->op_file);
if (fp != open->op_file) {
- if ((status = nfs4_check_open(fp, open, &stp)))
+ status = nfs4_check_open(fp, open, &stp);
+ if (status)
goto out;
status = nfs4_check_deleg(cl, open, &dp);
if (status)
@@ -4269,17 +4346,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
}
}

-static void
-reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- int i;
-
- for (i = 1; i < 4; i++) {
- if ((i & deny) != i)
- clear_deny(i, stp);
- }
-}
-
__be32
nfsd4_open_downgrade(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 72aee4b4f1ae..015b972da8ba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -391,6 +391,7 @@ struct nfs4_file {
* + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
*/
atomic_t fi_access[2];
+ u32 fi_share_deny;
struct file *fi_deleg_file;
struct file_lock *fi_lease;
atomic_t fi_delegees;
--
1.9.3


2014-07-10 18:07:57

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 09/11] nfsd: always hold the fi_lock when bumping fi_access refcounts

Once we remove the client_mutex, there's an unlikely but possible race
that could occur. It will be possible for nfs4_file_put_access to race
with nfs4_file_get_access. The refcount will go to zero (briefly) and
then bumped back to one. If that happens we set ourselves up for a
use-after-free and the potential for a lock to race onto the i_flock
list as a filp is being torn down.

Ensure that we can safely bump the refcount on the file by holding the
fi_lock whenever that's done. The only place it currently isn't is in
get_lock_access.

In order to ensure atomicity with finding the file, use the
find_*_file_locked variants and then call get_lock_access to get new
access references on the nfs4_file under the same lock.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5f7294712ad4..8f320f2f8b84 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -377,6 +377,8 @@ static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
static void
__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
+ lockdep_assert_held(&fp->fi_lock);
+
if (access & NFS4_SHARE_ACCESS_WRITE)
atomic_inc(&fp->fi_access[O_WRONLY]);
if (access & NFS4_SHARE_ACCESS_READ)
@@ -386,6 +388,8 @@ __nfs4_file_get_access(struct nfs4_file *fp, u32 access)
static __be32
nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
+ lockdep_assert_held(&fp->fi_lock);
+
/* Does this access mode make sense? */
if (access & ~NFS4_SHARE_ACCESS_BOTH)
return nfserr_inval;
@@ -4572,6 +4576,8 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
{
struct nfs4_file *fp = lock_stp->st_file;

+ lockdep_assert_held(&fp->fi_lock);
+
if (test_access(access, lock_stp))
return;
__nfs4_file_get_access(fp, access);
@@ -4623,6 +4629,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_openowner *open_sop = NULL;
struct nfs4_lockowner *lock_sop = NULL;
struct nfs4_ol_stateid *lock_stp;
+ struct nfs4_file *fp;
struct file *filp = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
@@ -4703,20 +4710,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}

+ fp = lock_stp->st_file;
locks_init_lock(file_lock);
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- filp = find_readable_file(lock_stp->st_file);
+ spin_lock(&fp->fi_lock);
+ filp = find_readable_file_locked(fp);
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
+ spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- filp = find_writeable_file(lock_stp->st_file);
+ spin_lock(&fp->fi_lock);
+ filp = find_writeable_file_locked(fp);
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
+ spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_WRLCK;
break;
default:
--
1.9.3


2014-07-10 18:07:46

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 01/11] nfsd: Add fine grained protection for the nfs4_file->fi_stateids list

From: Trond Myklebust <[email protected]>

Access to this list is currently serialized by the client_mutex. Add
finer grained locking around this list in preparation for its removal.

Signed-off-by: Trond Myklebust <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/nfs4state.c | 21 ++++++++++++++++++---
fs/nfsd/state.h | 1 +
2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3704789ca4b7..cfb10d060c83 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -708,7 +708,11 @@ release_all_access(struct nfs4_ol_stateid *stp)

static void unhash_generic_stateid(struct nfs4_ol_stateid *stp)
{
+ struct nfs4_file *fp = stp->st_file;
+
+ spin_lock(&fp->fi_lock);
list_del(&stp->st_perfile);
+ spin_unlock(&fp->fi_lock);
list_del(&stp->st_perstateowner);
}

@@ -2676,6 +2680,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
lockdep_assert_held(&state_lock);

atomic_set(&fp->fi_ref, 1);
+ spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
ihold(ino);
@@ -2799,7 +2804,6 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
- list_add(&stp->st_perfile, &fp->fi_stateids);
stp->st_stateowner = &oo->oo_owner;
get_nfs4_file(fp);
stp->st_file = fp;
@@ -2808,6 +2812,9 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
set_access(open->op_share_access, stp);
set_deny(open->op_share_deny, stp);
stp->st_openstp = NULL;
+ spin_lock(&fp->fi_lock);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ spin_unlock(&fp->fi_lock);
}

static void
@@ -2915,6 +2922,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return nfs_ok;
ret = nfserr_locked;
/* Search for conflicting share reservations */
+ spin_lock(&fp->fi_lock);
list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
if (test_deny(deny_type, stp) ||
test_deny(NFS4_SHARE_DENY_BOTH, stp))
@@ -2922,6 +2930,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
}
ret = nfs_ok;
out:
+ spin_unlock(&fp->fi_lock);
put_nfs4_file(fp);
return ret;
}
@@ -3150,6 +3159,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st
struct nfs4_ol_stateid *local;
struct nfs4_openowner *oo = open->op_openowner;

+ spin_lock(&fp->fi_lock);
list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
@@ -3158,9 +3168,12 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st
if (local->st_stateowner == &oo->oo_owner)
*stpp = local;
/* check for conflicting share reservations */
- if (!test_share(local, open))
+ if (!test_share(local, open)) {
+ spin_unlock(&fp->fi_lock);
return nfserr_share_denied;
+ }
}
+ spin_unlock(&fp->fi_lock);
return nfs_ok;
}

@@ -4408,7 +4421,6 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct
if (stp == NULL)
return NULL;
stp->st_stid.sc_type = NFS4_LOCK_STID;
- list_add(&stp->st_perfile, &fp->fi_stateids);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
stp->st_stateowner = &lo->lo_owner;
get_nfs4_file(fp);
@@ -4417,6 +4429,9 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
list_add(&stp->st_locks, &open_stp->st_locks);
+ spin_lock(&fp->fi_lock);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ spin_unlock(&fp->fi_lock);
return stp;
}

diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 06d1a908a58e..04737b3ed363 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -377,6 +377,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */
struct nfs4_file {
atomic_t fi_ref;
+ spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash by "struct inode *" */
struct list_head fi_stateids;
struct list_head fi_delegations;
--
1.9.3


2014-07-10 20:14:51

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Thu, Jul 10, 2014 at 02:07:24PM -0400, Jeff Layton wrote:
> Here is the deny mode handling overhaul broken out into a separate
> series, as requested by Christoph. I've tried to address most of
> his review comments, but I may have missed some. Let me know if
> if I did.

They look fine to me; I'll merge them soon absent any objections from
Christoph.

--b.

>
> Part of the client_mutex removal series involves cleaning up the
> handling of deny modes in the server in order to setting and
> enforcement atomic.
>
> This does add a new per-nfs4_file spinlock, but in principle it
> should never be contended while the client_mutex is still wrapped
> around all of this code.
>
> Jeff Layton (8):
> nfsd: refactor nfs4_file_get_access and nfs4_file_put_access
> nfsd: remove nfs4_file_put_fd
> nfsd: shrink st_access_bmap and st_deny_bmap
> nfsd: set stateid access and deny bits in nfs4_get_vfs_file
> nfsd: clean up reset_union_bmap_deny
> nfsd: always hold the fi_lock when bumping fi_access refcounts
> nfsd: make deny mode enforcement more efficient and close races in it
> nfsd: cleanup and rename nfs4_check_open
>
> Trond Myklebust (3):
> nfsd: Add fine grained protection for the nfs4_file->fi_stateids list
> nfsd: Add locking to the nfs4_file->fi_fds[] array
> nfsd: clean up helper __release_lock_stateid
>
> fs/nfsd/nfs4state.c | 437 ++++++++++++++++++++++++++++++++++++++--------------
> fs/nfsd/state.h | 32 +---
> 2 files changed, 325 insertions(+), 144 deletions(-)
>
> --
> 1.9.3
>

2014-07-11 17:48:26

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

On Fri, 11 Jul 2014 10:31:26 -0700
"Frank Filz" <[email protected]> wrote:

> > The current enforcement of deny modes is both inefficient and scattered
> > across several places, which makes it hard to guarantee atomicity. The
> > inefficiency is a problem now, and the lack of atomicity will mean races
> once
> > the client_mutex is removed.
> >
> > First, we address the inefficiency. We have to track deny modes on a per-
> > stateid basis to ensure that open downgrades are sane, but when the server
> > goes to enforce them it has to walk the entire list of stateids and check
> > against each one.
> >
> > Instead of doing that, maintain a per-nfs4_file deny mode. When a file is
> > opened, we simply set any deny bits in that mode that were specified in
> the
> > OPEN call. We can then use that unified deny mode to do a simple check to
> > see whether there are any conflicts without needing to walk the entire
> > stateid list.
> >
> > The only time we'll need to walk the entire list of stateids is when a
> stateid
> > that has a deny mode on it is being released, or one is having its deny
> mode
> > downgraded. In that case, we must walk the entire list and recalculate the
> > fi_share_deny field. Since deny modes are pretty rare today, this should
> be
> > very rare under normal workloads.
>
> What we do in Ganesha to avoid walking the list of stateids on release is
> maintain the effective deny (and access) mode not at bits, but as a counter
> for each bit. Thus, to remove a SHARE_ACCESS_READ | SHARE_DENY_WRITE, you
> decrement the counts for access_read and deny_write.
>
> Frank
>
>

Sure, that's another possibility that I considered, but I didn't want
to be bothered with having to add counters for deny modes. In practice
there are *no* clients that use them (aside from pynfs and maybe the
semi-mythical Windows v4.1 client).

With this scheme, deny mode enforcement is pretty darned efficient,
particularly in the common case where there are no deny modes to
enforce.

Any penalty for the use of deny modes is generally paid during the
CLOSE or OPEN_DOWNGRADE on behalf of the client that's using them.
Any RPC from a client that's not won't need to do any extra work
(aside from maybe spinning on the fi_lock while another client is
having to recalculate the fi_share_deny).

--
Jeff Layton <[email protected]>

2014-07-11 18:00:44

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 10/11] nfsd: make deny mode enforcement more efficient and close races in it

On Fri, Jul 11, 2014 at 1:56 PM, Frank Filz <[email protected]> wrote:
>> On Fri, 11 Jul 2014 10:31:26 -0700
>> "Frank Filz" <[email protected]> wrote:
>>
>> > > The current enforcement of deny modes is both inefficient and
>> > > scattered across several places, which makes it hard to guarantee
>> > > atomicity. The inefficiency is a problem now, and the lack of
>> > > atomicity will mean races
>> > once
>> > > the client_mutex is removed.
>> > >
>> > > First, we address the inefficiency. We have to track deny modes on a
>> > > per- stateid basis to ensure that open downgrades are sane, but when
>> > > the server goes to enforce them it has to walk the entire list of
>> > > stateids and check against each one.
>> > >
>> > > Instead of doing that, maintain a per-nfs4_file deny mode. When a
>> > > file is opened, we simply set any deny bits in that mode that were
>> > > specified in
>> > the
>> > > OPEN call. We can then use that unified deny mode to do a simple
>> > > check to see whether there are any conflicts without needing to walk
>> > > the entire stateid list.
>> > >
>> > > The only time we'll need to walk the entire list of stateids is when
>> > > a
>> > stateid
>> > > that has a deny mode on it is being released, or one is having its
>> > > deny
>> > mode
>> > > downgraded. In that case, we must walk the entire list and
>> > > recalculate the fi_share_deny field. Since deny modes are pretty
>> > > rare today, this should
>> > be
>> > > very rare under normal workloads.
>> >
>> > What we do in Ganesha to avoid walking the list of stateids on release
>> > is maintain the effective deny (and access) mode not at bits, but as a
>> > counter for each bit. Thus, to remove a SHARE_ACCESS_READ |
>> > SHARE_DENY_WRITE, you decrement the counts for access_read and
>> deny_write.
>> >
>> > Frank
>> >
>> >
>>
>> Sure, that's another possibility that I considered, but I didn't want to
> be
>> bothered with having to add counters for deny modes. In practice there are
>> *no* clients that use them (aside from pynfs and maybe the semi-mythical
>> Windows v4.1 client).

You don't need counters for deny modes. There can only be 1 of each,
since any deny mode has to be part of an OPEN that has at least one
non-zero share access mode. So a single bit for each should be fine.

>>
>> With this scheme, deny mode enforcement is pretty darned efficient,
>> particularly in the common case where there are no deny modes to enforce.
>>
>> Any penalty for the use of deny modes is generally paid during the CLOSE
> or
>> OPEN_DOWNGRADE on behalf of the client that's using them.
>> Any RPC from a client that's not won't need to do any extra work (aside
> from
>> maybe spinning on the fi_lock while another client is having to
> recalculate the
>> fi_share_deny).
>
> Good point.
>
> Whatever happened to Pavel Shilovsky's O_DENY patch set? I was looking
> forward to that for allowing Ganesha and Samba share reservations to more
> fully interact with each other...
>
> Frank
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html



--
Trond Myklebust

Linux NFS client maintainer, PrimaryData

[email protected]

2014-07-13 11:42:48

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 00/11] nfsd: deny mode handling overhaul

On Fri, Jul 11, 2014 at 11:42:27AM -0400, Jeff Layton wrote:
> Yes, thanks to both of you. The review and merging are much
> appreciated. Only 89 patches to go once these go in!

Might help if we can get them into slighly smaller reviewable chunks,
e.g. the next batch might be the stateid refcounting and everything
around it, deferring the various client and session related bits and the
fault injection changes for now.

2014-07-10 18:08:01

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 11/11] nfsd: cleanup and rename nfs4_check_open

Rename it to better describe what it does, and have it just return the
stateid instead of a __be32 (which is now always nfs_ok). Also, do the
search for an existing stateid after the delegation check, to reduce
cleanup if the delegation check returns error.

Signed-off-by: Jeff Layton <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/nfs4state.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index da88b31c0afe..225f98c7d00d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3297,10 +3297,10 @@ out:
return nfs_ok;
}

-static __be32
-nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp)
+static struct nfs4_ol_stateid *
+nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
{
- struct nfs4_ol_stateid *local;
+ struct nfs4_ol_stateid *local, *ret = NULL;
struct nfs4_openowner *oo = open->op_openowner;

spin_lock(&fp->fi_lock);
@@ -3308,14 +3308,13 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
- /* remember if we have seen this open owner */
if (local->st_stateowner == &oo->oo_owner) {
- *stpp = local;
+ ret = local;
break;
}
}
spin_unlock(&fp->fi_lock);
- return nfs_ok;
+ return ret;
}

static inline int nfs4_access_to_access(u32 nfs4_access)
@@ -3658,12 +3657,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
fp = find_or_add_file(ino, open->op_file);
if (fp != open->op_file) {
- status = nfs4_check_open(fp, open, &stp);
- if (status)
- goto out;
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ stp = nfsd4_find_existing_open(fp, open);
} else {
open->op_file = NULL;
status = nfserr_bad_stateid;
--
1.9.3


2014-07-10 18:07:50

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 04/11] nfsd: refactor nfs4_file_get_access and nfs4_file_put_access

Have them take NFS4_SHARE_ACCESS_* flags instead of an open mode. This
spares the callers from having to convert it themselves.

This also allows us to simplify these functions as we no longer need
to do the access_to_omode conversion in either one.

Note too that this patch eliminates the WARN_ON in
__nfs4_file_get_access. It's valid for now, but in a later patch we'll
be bumping the refcounts prior to opening the file in order to close
some races, at which point we'll need to remove it anyway.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 48 +++++++++++++++++++++++++++---------------------
1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4ab567e7db0f..a19257f91f25 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -374,19 +374,24 @@ static unsigned int file_hashval(struct inode *ino)

static struct hlist_head file_hashtbl[FILE_HASH_SIZE];

-static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+static void
+__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
- WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
- atomic_inc(&fp->fi_access[oflag]);
+ if (access & NFS4_SHARE_ACCESS_WRITE)
+ atomic_inc(&fp->fi_access[O_WRONLY]);
+ if (access & NFS4_SHARE_ACCESS_READ)
+ atomic_inc(&fp->fi_access[O_RDONLY]);
}

-static void nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+static __be32
+nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
- if (oflag == O_RDWR) {
- __nfs4_file_get_access(fp, O_RDONLY);
- __nfs4_file_get_access(fp, O_WRONLY);
- } else
- __nfs4_file_get_access(fp, oflag);
+ /* Does this access mode make sense? */
+ if (access & ~NFS4_SHARE_ACCESS_BOTH)
+ return nfserr_inval;
+
+ __nfs4_file_get_access(fp, access);
+ return nfs_ok;
}

static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
@@ -417,13 +422,14 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
}
}

-static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
{
- if (oflag == O_RDWR) {
- __nfs4_file_put_access(fp, O_RDONLY);
+ WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
+
+ if (access & NFS4_SHARE_ACCESS_WRITE)
__nfs4_file_put_access(fp, O_WRONLY);
- } else
- __nfs4_file_put_access(fp, oflag);
+ if (access & NFS4_SHARE_ACCESS_READ)
+ __nfs4_file_put_access(fp, O_RDONLY);
}

static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
@@ -784,8 +790,7 @@ release_all_access(struct nfs4_ol_stateid *stp)

for (i = 1; i < 4; i++) {
if (test_access(i, stp))
- nfs4_file_put_access(stp->st_file,
- nfs4_access_to_omode(i));
+ nfs4_file_put_access(stp->st_file, i);
clear_access(i, stp);
}
}
@@ -3307,10 +3312,12 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
filp = NULL;
}
}
- nfs4_file_get_access(fp, oflag);
+ status = nfs4_file_get_access(fp, open->op_share_access);
spin_unlock(&fp->fi_lock);
if (filp)
fput(filp);
+ if (status)
+ goto out_put_access;

status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
@@ -3319,7 +3326,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
return nfs_ok;

out_put_access:
- nfs4_file_put_access(fp, oflag);
+ nfs4_file_put_access(fp, open->op_share_access);
out:
return status;
}
@@ -4228,7 +4235,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a
{
if (!test_access(access, stp))
return;
- nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access));
+ nfs4_file_put_access(stp->st_file, access);
clear_access(access, stp);
}

@@ -4555,11 +4562,10 @@ check_lock_length(u64 offset, u64 length)
static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
{
struct nfs4_file *fp = lock_stp->st_file;
- int oflag = nfs4_access_to_omode(access);

if (test_access(access, lock_stp))
return;
- nfs4_file_get_access(fp, oflag);
+ __nfs4_file_get_access(fp, access);
set_access(access, lock_stp);
}

--
1.9.3


2014-07-10 18:07:54

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 07/11] nfsd: set stateid access and deny bits in nfs4_get_vfs_file

Cleanup -- ensure that the stateid bits are set at the same time that
the file access refcounts are incremented. Keeping them coherent like
this makes it easier to ensure that we account for all of the
references.

Since the initialization of the st_*_bmap fields is done when it's
hashed, we go ahead and hash the stateid before getting access to the
file and unhash it if that function returns error. This will be
necessary anyway in a follow-on patch that will overhaul deny mode
handling.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f7f11631c26c..0a54fc956463 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3300,7 +3300,8 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
}

static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
- struct svc_fh *cur_fh, struct nfsd4_open *open)
+ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
+ struct nfsd4_open *open)
{
struct file *filp = NULL;
__be32 status;
@@ -3330,6 +3331,9 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
if (status)
goto out_put_access;

+ /* Set access and deny bits in stateid */
+ set_access(open->op_share_access, stp);
+ set_deny(open->op_share_deny, stp);
return nfs_ok;

out_put_access:
@@ -3341,20 +3345,15 @@ out:
static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
{
- u32 op_share_access = open->op_share_access;
__be32 status;

- if (!test_access(op_share_access, stp))
- status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
+ if (!test_access(open->op_share_access, stp))
+ status = nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
else
status = nfsd4_truncate(rqstp, cur_fh, open);

if (status)
return status;
-
- /* remember the open */
- set_access(op_share_access, stp);
- set_deny(open->op_share_deny, stp);
return nfs_ok;
}

@@ -3602,12 +3601,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (status)
goto out;
} else {
- status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
- if (status)
- goto out;
stp = open->op_stp;
open->op_stp = NULL;
init_open_stateid(stp, fp, open);
+ status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+ if (status) {
+ release_open_stateid(stp);
+ goto out;
+ }
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
--
1.9.3


2014-07-10 18:07:51

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 03/11] nfsd: clean up helper __release_lock_stateid

From: Trond Myklebust <[email protected]>

Use filp_close instead of open coding. filp_close does a bit more than
just release the locks and put the filp. It also calls ->flush and
dnotify_flush, both of which should be done here anyway.

Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: Jeff Layton <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
---
fs/nfsd/nfs4state.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 314dc8061461..4ab567e7db0f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -821,10 +821,8 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp)
unhash_generic_stateid(stp);
unhash_stid(&stp->st_stid);
file = find_any_file(stp->st_file);
- if (file) {
- locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
- fput(file);
- }
+ if (file)
+ filp_close(file, (fl_owner_t)lockowner(stp->st_stateowner));
close_generic_stateid(stp);
free_generic_stateid(stp);
}
--
1.9.3


2014-07-10 18:07:52

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 05/11] nfsd: remove nfs4_file_put_fd

...and replace it with a simple swap call.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a19257f91f25..c02bad6d7e90 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -394,15 +394,6 @@ nfs4_file_get_access(struct nfs4_file *fp, u32 access)
return nfs_ok;
}

-static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
-{
- struct file *filp;
-
- filp = fp->fi_fds[oflag];
- fp->fi_fds[oflag] = NULL;
- return filp;
-}
-
static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
{
might_lock(&fp->fi_lock);
@@ -411,9 +402,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
struct file *f1 = NULL;
struct file *f2 = NULL;

- f1 = nfs4_file_put_fd(fp, oflag);
+ swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
- f2 = nfs4_file_put_fd(fp, O_RDWR);
+ swap(f2, fp->fi_fds[O_RDWR]);
spin_unlock(&fp->fi_lock);
if (f1)
fput(f1);
--
1.9.3