2023-11-01 01:01:10

by NeilBrown

[permalink] [raw]
Subject: [PATCH 0/6 v2] support admin-revocation of v4 state

This set fixes two issues found by kernel test robot
1/ nfsd4_revoke_states() needs to be defined when not CONFIG_NFSD_V4
2/ nfs40_last_revoke is larger than a mechine word in 32 bit configs,
so needs protection - use nn->client_lock which is already taken when
nfs40_last_revoke is accessed.

NeilBrown




2023-11-01 01:01:23

by NeilBrown

[permalink] [raw]
Subject: [PATCH 2/6] nfsd: allow admin-revoked state to appear in /proc/fs/nfsd/clients/*/states

Change the "show" functions to show some content even if a file cannot
be found, and call them for the ADMIN_REVOKED versions of state id.
This use primarily useful for debugging - to ensure states are being
removed eventually.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4state.c | 78 ++++++++++++++++++++++-----------------------
1 file changed, 38 insertions(+), 40 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f3ba53a16105..e15d35c57991 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2679,17 +2679,10 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
struct nfs4_stateowner *oo;
unsigned int access, deny;

- if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
- return 0; /* XXX: or SEQ_SKIP? */
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;

- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;
-
seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: open, ");
@@ -2704,14 +2697,17 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");

- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
+ spin_unlock(&nf->fi_lock);
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
-out:
- spin_unlock(&nf->fi_lock);
return 0;
}

@@ -2725,30 +2721,29 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;

seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: lock, ");

- /*
- * Note: a lock stateid isn't really the same thing as a lock,
- * it's the locking state held by one owner on a file, and there
- * may be multiple (or no) lock ranges associated with it.
- * (Same for the matter is true of open stateids.)
- */
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */

- nfs4_show_superblock(s, file);
- /* XXX: open stateid? */
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
nfs4_show_owner(s, oo);
seq_printf(s, " }\n");
-out:
spin_unlock(&nf->fi_lock);
return 0;
}
@@ -2761,27 +2756,27 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)

ds = delegstateid(st);
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = nf->fi_deleg_file;
- if (!file)
- goto out;

seq_printf(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
seq_printf(s, ": { type: deleg, ");

/* Kinda dead code as long as we only support read delegs: */
- seq_printf(s, "access: %s, ",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");

/* XXX: lease time, whether it's being recalled. */

- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, " }\n");
-out:
+ spin_lock(&nf->fi_lock);
+ file = nf->fi_deleg_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
spin_unlock(&nf->fi_lock);
+ seq_puts(s, " }\n");
return 0;
}

@@ -2813,10 +2808,13 @@ static int states_show(struct seq_file *s, void *v)

switch (st->sc_type) {
case NFS4_OPEN_STID:
+ case NFS4_ADMIN_REVOKED_STID:
return nfs4_show_open(s, st);
case NFS4_LOCK_STID:
+ case NFS4_ADMIN_REVOKED_LOCK_STID:
return nfs4_show_lock(s, st);
case NFS4_DELEG_STID:
+ case NFS4_ADMIN_REVOKED_DELEG_STID:
return nfs4_show_deleg(s, st);
case NFS4_LAYOUT_STID:
return nfs4_show_layout(s, st);
--
2.42.0

2023-11-01 01:01:26

by NeilBrown

[permalink] [raw]
Subject: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

The NFSv4 protocol allows state to be revoked by the admin and has error
codes which allow this to be communicated to the client.

This patch
- introduces 3 new state-id types for revoked open, lock, and
delegation state. This requires the bitmask to be 'short',
not 'char'
- reports NFS4ERR_ADMIN_REVOKED when these are accessed
- introduces a per-client counter of these states and returns
SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
Decrement this when freeing any admin-revoked state.
- introduces stub code to find all interesting states for a given
superblock so they can be revoked via the 'unlock_filesystem'
file in /proc/fs/nfsd/
No actual states are handled yet.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4layouts.c | 2 +-
fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
fs/nfsd/nfsctl.c | 1 +
fs/nfsd/nfsd.h | 1 +
fs/nfsd/state.h | 35 +++++++++++-----
fs/nfsd/trace.h | 8 +++-
6 files changed, 120 insertions(+), 20 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 5e8096bc5eaa..09d0363bfbc4 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
- unsigned char typemask = NFS4_LAYOUT_STID;
+ unsigned short typemask = NFS4_LAYOUT_STID;
__be32 status;

if (create)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 65fd5510323a..f3ba53a16105 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
return NULL;
}

+void nfs4_unhash_stid(struct nfs4_stid *s)
+{
+ if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
+ s->sc_type = 0;
+}
+
void
nfs4_put_stid(struct nfs4_stid *s)
{
@@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ nfs4_unhash_stid(s);
nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
@@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
nfs4_put_stid(&dp->dl_stid);
}

-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
- s->sc_type = 0;
-}
-
/**
* nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
@@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
}

idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ nfs4_unhash_stid(s);
list_add(&stp->st_locks, reaplist);
}

@@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
nfs4_put_stateowner(&oo->oo_owner);
}

+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+ struct super_block *sb,
+ unsigned short sc_types)
+{
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if ((stid->sc_type & sc_types) &&
+ stid->sc_file->fi_inode->i_sb == sb) {
+ refcount_inc(&stid->sc_count);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ return stid;
+}
+
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unsigned int idhashval;
+ unsigned short sc_types;
+
+ sc_types = 0;
+
+ spin_lock(&nn->client_lock);
+ for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+ struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+ struct nfs4_client *clp;
+ retry:
+ list_for_each_entry(clp, head, cl_idhash) {
+ struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+ sc_types);
+ if (stid) {
+ spin_unlock(&nn->client_lock);
+ switch (stid->sc_type) {
+ }
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ goto retry;
+ }
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
@@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
}

static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+ unsigned short typemask)
{
struct nfs4_stid *s;

@@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
}
seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "admin-revoked states: %d\n",
+ atomic_read(&clp->cl_admin_revoked));
drop_client(clp);

return 0;
@@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
if (!list_empty(&clp->cl_revoked))
seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+ if (atomic_read(&clp->cl_admin_revoked))
+ seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
out_no_session:
if (conn)
free_conn(conn);
@@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
status = nfserr_deleg_revoked;
goto out;
}
+ if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
+ nfs4_put_stid(&deleg->dl_stid);
+ status = nfserr_admin_revoked;
+ goto out;
+ }
flags = share_access_to_flags(open->op_share_access);
status = nfs4_check_delegmode(deleg, flags);
if (status) {
@@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
case NFS4_REVOKED_DELEG_STID:
status = nfserr_deleg_revoked;
break;
+ case NFS4_ADMIN_REVOKED_STID:
+ case NFS4_ADMIN_REVOKED_LOCK_STID:
+ case NFS4_ADMIN_REVOKED_DELEG_STID:
+ status = nfserr_admin_revoked;
+ break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
status = nfsd4_check_openowner_confirmed(openlockstateid(s));
@@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)

__be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid, unsigned short typemask,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
@@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
else if (typemask & NFS4_DELEG_STID)
typemask |= NFS4_REVOKED_DELEG_STID;

+ if (typemask & NFS4_OPEN_STID)
+ typemask |= NFS4_ADMIN_REVOKED_STID;
+ if (typemask & NFS4_LOCK_STID)
+ typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
+ if (typemask & NFS4_DELEG_STID)
+ typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
+
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return nfserr_bad_stateid;
@@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfserr_deleg_revoked;
return nfserr_bad_stateid;
}
+ if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
+ nfs4_put_stid(stid);
+ return nfserr_admin_revoked;
+ }
*s = stid;
return nfs_ok;
}
@@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
- stateid_t *stateid, char typemask,
+ stateid_t *stateid, unsigned short typemask,
struct nfs4_ol_stateid **stpp,
struct nfsd_net *nn)
{
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 739ed5bf71cd..50368eec86b0 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+ nfsd4_revoke_states(netns(file), path.dentry->d_sb);

path_put(&path);
return error;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index f5ff42f41ee7..d46203eac3c8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f96eaa8e9413..3af5ab55c978 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
+ struct list_head sc_cp_list;
+ unsigned short sc_type;
+#define NFS4_OPEN_STID BIT(0)
+#define NFS4_LOCK_STID BIT(1)
+#define NFS4_DELEG_STID BIT(2)
/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+#define NFS4_CLOSED_STID BIT(3)
/* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
- struct list_head sc_cp_list;
- unsigned char sc_type;
+#define NFS4_REVOKED_DELEG_STID BIT(4)
+#define NFS4_CLOSED_DELEG_STID BIT(5)
+#define NFS4_LAYOUT_STID BIT(6)
+#define NFS4_ADMIN_REVOKED_STID BIT(7)
+#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
+#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
+#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
+ NFS4_ADMIN_REVOKED_LOCK_STID | \
+ NFS4_ADMIN_REVOKED_DELEG_STID)
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@@ -372,6 +378,7 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ atomic_t cl_admin_revoked; /* count of admin-revoked states */
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
@@ -694,7 +701,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid, unsigned short typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
@@ -736,6 +743,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
}
struct nfsd_file *find_any_file(struct nfs4_file *f);

+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
+
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index fbc0ccb40424..e359d531402c 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
+TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
+TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
+TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);

#define show_stid_type(x) \
__print_flags(x, "|", \
@@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
{ NFS4_CLOSED_STID, "CLOSED" }, \
{ NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
{ NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
- { NFS4_LAYOUT_STID, "LAYOUT" })
+ { NFS4_LAYOUT_STID, "LAYOUT" }, \
+ { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
+ { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
+ { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })

DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO(
--
2.42.0

2023-11-01 01:01:32

by NeilBrown

[permalink] [raw]
Subject: [PATCH 3/6] nfsd: allow admin-revoked NFSv4.0 state to be freed.

For NFSv4.1 and later the client easily discovers if there is any
admin-revoked state and will then find and explicitly free it.

For NFSv4.0 there is no such mechanism. The client can only find that
state is admin-revoked if it tries to use that state, and there is no
way for it to explicitly free the state. So the server must hold on to,
at least, the stateid for an indefinite amount of time. A
RELEASE_LOCKOWNER request might justify forgetting some of these
stateids, as would the whole clients lease lapsing, but these are not
reliable.

This patch takes two approaches.

Whenever a client uses an revoked stateid, that stateid is then
discarded and will not be recognised again. This might confuse a client
which expect to get NFS4ERR_ADMIN_REVOKED consistently once it get it at
all, but should mostly work. Hopefully one error will lead to other
resources being closed (e.g. process exits), which will result in more
stateid being freed when a CLOSE attempt gets NFS4ERR_ADMIN_REVOKED.

Also, any admin-revoked stateids that have been that way for more than
one lease time are periodically revoke.

No actual freeing of state happens in this patch. That will come in
future patches which handle the different sorts of revoked state.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/netns.h | 4 ++
fs/nfsd/nfs4state.c | 98 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 102 insertions(+)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index ec49b200b797..02f8fa095b0f 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -197,6 +197,10 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work;
+
+ /* last time an admin-revoke happened for NFSv4.0 */
+ time64_t nfs40_last_revoke;
+
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e15d35c57991..13484a9cef21 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1724,6 +1724,14 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
}
nfs4_put_stid(stid);
spin_lock(&nn->client_lock);
+ if (clp->cl_minorversion == 0)
+ /* Allow cleanup after a lease period.
+ * store_release ensures cleanup will
+ * see any newly revoked states if it
+ * sees the time updated.
+ */
+ nn->nfs40_last_revoke =
+ ktime_get_boottime_seconds();
goto retry;
}
}
@@ -4645,6 +4653,39 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
return ret;
}

+static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
+{
+ struct nfs4_client *cl = s->sc_client;
+
+ switch (s->sc_type) {
+ default:
+ spin_unlock(&cl->cl_lock);
+ }
+}
+
+static void nfs40_drop_revoked_stid(struct nfs4_client *cl,
+ stateid_t *stid)
+{
+ /* NFSv4.0 has no way for the client to tell the server
+ * that it can forget an admin-revoked stateid.
+ * So we keep it around until the first time that the
+ * client uses it, and drop it the first time
+ * nfserr_admin_revoked is returned.
+ * For v4.1 and later we wait until explicitly told
+ * to free the stateid.
+ */
+ if (cl->cl_minorversion == 0) {
+ struct nfs4_stid *st;
+
+ spin_lock(&cl->cl_lock);
+ st = find_stateid_locked(cl, stid);
+ if (st)
+ nfsd_drop_revoked_stid(st);
+ else
+ spin_unlock(&cl->cl_lock);
+ }
+}
+
static __be32
nfsd4_verify_open_stid(struct nfs4_stid *s)
{
@@ -4672,6 +4713,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)

mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret == nfserr_admin_revoked)
+ nfs40_drop_revoked_stid(stp->st_stid.sc_client,
+ &stp->st_stid.sc_stateid);
+
if (ret != nfs_ok)
mutex_unlock(&stp->st_mutex);
return ret;
@@ -5256,6 +5301,7 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
}
if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
nfs4_put_stid(&deleg->dl_stid);
+ nfs40_drop_revoked_stid(cl, &open->op_delegate_stateid);
status = nfserr_admin_revoked;
goto out;
}
@@ -6253,6 +6299,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
}
}

+static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
+ struct laundry_time *lt)
+{
+ struct nfs4_client *clp;
+
+ spin_lock(&nn->client_lock);
+ if (nn->nfs40_last_revoke == 0 ||
+ nn->nfs40_last_revoke > lt->cutoff) {
+ spin_unlock(&nn->client_lock);
+ return;
+ }
+ nn->nfs40_last_revoke = 0;
+
+retry:
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ if (atomic_read(&clp->cl_admin_revoked) == 0)
+ continue;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
+ refcount_inc(&stid->sc_count);
+ spin_unlock(&nn->client_lock);
+ /* this function drops ->cl_lock */
+ nfsd_drop_revoked_stid(stid);
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ goto retry;
+ }
+ spin_unlock(&clp->cl_lock);
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -6286,6 +6369,8 @@ nfs4_laundromat(struct nfsd_net *nn)
nfs4_get_client_reaplist(nn, &reaplist, &lt);
nfs4_process_client_reaplist(&reaplist);

+ nfs40_clean_admin_revoked(nn, &lt);
+
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
@@ -6504,6 +6589,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
if (ret == nfs_ok)
ret = check_stateid_generation(in, &s->sc_stateid, has_session);
spin_unlock(&s->sc_lock);
+ if (ret == nfserr_admin_revoked)
+ nfs40_drop_revoked_stid(s->sc_client,
+ &s->sc_stateid);
return ret;
}

@@ -6555,6 +6643,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
}
out_unlock:
spin_unlock(&cl->cl_lock);
+ if (status == nfserr_admin_revoked)
+ nfs40_drop_revoked_stid(cl, stateid);
return status;
}

@@ -6604,6 +6694,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfserr_bad_stateid;
}
if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
+ nfs40_drop_revoked_stid(cstate->clp, stateid);
nfs4_put_stid(stid);
return nfserr_admin_revoked;
}
@@ -6923,6 +7014,13 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_put_stid(s);
ret = nfs_ok;
goto out;
+ case NFS4_ADMIN_REVOKED_STID:
+ case NFS4_ADMIN_REVOKED_LOCK_STID:
+ case NFS4_ADMIN_REVOKED_DELEG_STID:
+ spin_unlock(&s->sc_lock);
+ nfsd_drop_revoked_stid(s);
+ ret = nfs_ok;
+ goto out;
/* Default falls through and returns nfserr_bad_stateid */
}
spin_unlock(&s->sc_lock);
--
2.42.0

2023-11-01 01:01:36

by NeilBrown

[permalink] [raw]
Subject: [PATCH 4/6] nfsd: allow lock state ids to be revoked and then freed

Revoking state through 'unlock_filesystem' now revokes any lock states
found. When the stateids are then freed by the client, the revoked
stateids will be cleaned up correctly.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 13484a9cef21..c2e064f6c7c4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1708,7 +1708,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
unsigned int idhashval;
unsigned short sc_types;

- sc_types = 0;
+ sc_types = NFS4_LOCK_STID;

spin_lock(&nn->client_lock);
for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1719,8 +1719,33 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
sc_types);
if (stid) {
+ struct nfs4_ol_stateid *stp;
+
spin_unlock(&nn->client_lock);
switch (stid->sc_type) {
+ case NFS4_LOCK_STID:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ LOCK_STATEID_MUTEX);
+ if (stid->sc_type == NFS4_LOCK_STID) {
+ struct nfs4_lockowner *lo =
+ lockowner(stp->st_stateowner);
+ struct nfsd_file *nf;
+
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file,
+ (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
+ release_all_access(stp);
+ stid->sc_type =
+ NFS4_ADMIN_REVOKED_LOCK_STID;
+ atomic_inc(&clp->cl_admin_revoked);
+ }
+ mutex_unlock(&stp->st_mutex);
+ break;
}
nfs4_put_stid(stid);
spin_lock(&nn->client_lock);
@@ -4656,8 +4681,17 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
{
struct nfs4_client *cl = s->sc_client;
+ struct nfs4_ol_stateid *stp;
+ bool unhashed;

switch (s->sc_type) {
+ case NFS4_ADMIN_REVOKED_LOCK_STID:
+ stp = openlockstateid(s);
+ unhashed = unhash_lock_stateid(stp);
+ spin_unlock(&cl->cl_lock);
+ if (unhashed)
+ nfs4_put_stid(s);
+ break;
default:
spin_unlock(&cl->cl_lock);
}
--
2.42.0

2023-11-01 01:01:54

by NeilBrown

[permalink] [raw]
Subject: [PATCH 5/6] nfsd: allow open state ids to be revoked and then freed

Revoking state through 'unlock_filesystem' now revokes any open states
found. When the stateids are then freed by the client, the revoked
stateids will be cleaned up correctly.

Possibly the related lock states should be revoked too, but a
subsequent patch will do that for all lock state on the superblock.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4state.c | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c2e064f6c7c4..ed879f68944b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1708,7 +1708,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
unsigned int idhashval;
unsigned short sc_types;

- sc_types = NFS4_LOCK_STID;
+ sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID;

spin_lock(&nn->client_lock);
for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1723,6 +1723,18 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)

spin_unlock(&nn->client_lock);
switch (stid->sc_type) {
+ case NFS4_OPEN_STID:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ OPEN_STATEID_MUTEX);
+ if (stid->sc_type == NFS4_OPEN_STID) {
+ release_all_access(stp);
+ stid->sc_type =
+ NFS4_ADMIN_REVOKED_STID;
+ atomic_inc(&clp->cl_admin_revoked);
+ }
+ mutex_unlock(&stp->st_mutex);
+ break;
case NFS4_LOCK_STID:
stp = openlockstateid(stid);
mutex_lock_nested(&stp->st_mutex,
@@ -4681,10 +4693,18 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
{
struct nfs4_client *cl = s->sc_client;
+ LIST_HEAD(reaplist);
struct nfs4_ol_stateid *stp;
bool unhashed;

switch (s->sc_type) {
+ case NFS4_ADMIN_REVOKED_STID:
+ stp = openlockstateid(s);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
+ spin_unlock(&cl->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
+ break;
case NFS4_ADMIN_REVOKED_LOCK_STID:
stp = openlockstateid(s);
unhashed = unhash_lock_stateid(stp);
--
2.42.0

2023-11-01 01:02:05

by NeilBrown

[permalink] [raw]
Subject: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed

Revoking state through 'unlock_filesystem' now revokes any delegation
states found. When the stateids are then freed by the client, the
revoked stateids will be cleaned up correctly.

As there is already support for revoking delegations, we build on that
for admin-revoking.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++++++-------
1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ed879f68944b..9fdbdc64262d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1367,7 +1367,8 @@ static void destroy_delegation(struct nfs4_delegation *dp)
destroy_unhashed_deleg(dp);
}

-static void revoke_delegation(struct nfs4_delegation *dp)
+static void revoke_delegation(struct nfs4_delegation *dp,
+ unsigned short sc_type)
{
struct nfs4_client *clp = dp->dl_stid.sc_client;

@@ -1375,9 +1376,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)

trace_nfsd_stid_revoke(&dp->dl_stid);

- if (clp->cl_minorversion) {
+ if (clp->cl_minorversion || sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
spin_lock(&clp->cl_lock);
- dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+ dp->dl_stid.sc_type = sc_type;
refcount_inc(&dp->dl_stid.sc_count);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
@@ -1708,7 +1709,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
unsigned int idhashval;
unsigned short sc_types;

- sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID;
+ sc_types = NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID;

spin_lock(&nn->client_lock);
for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
@@ -1720,6 +1721,7 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
sc_types);
if (stid) {
struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;

spin_unlock(&nn->client_lock);
switch (stid->sc_type) {
@@ -1758,6 +1760,18 @@ void nfsd4_revoke_states(struct net *net, struct super_block *sb)
}
mutex_unlock(&stp->st_mutex);
break;
+ case NFS4_DELEG_STID:
+ dp = delegstateid(stid);
+ spin_lock(&state_lock);
+ if (!unhash_delegation_locked(dp))
+ dp = NULL;
+ spin_unlock(&state_lock);
+ if (dp) {
+ list_del_init(&dp->dl_recall_lru);
+ revoke_delegation(
+ dp, NFS4_ADMIN_REVOKED_DELEG_STID);
+ }
+ break;
}
nfs4_put_stid(stid);
spin_lock(&nn->client_lock);
@@ -4695,6 +4709,7 @@ static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
struct nfs4_client *cl = s->sc_client;
LIST_HEAD(reaplist);
struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
bool unhashed;

switch (s->sc_type) {
@@ -4712,6 +4727,12 @@ static void nfsd_drop_revoked_stid(struct nfs4_stid *s)
if (unhashed)
nfs4_put_stid(s);
break;
+ case NFS4_ADMIN_REVOKED_DELEG_STID:
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ break;
default:
spin_unlock(&cl->cl_lock);
}
@@ -5073,8 +5094,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);

if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
- dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- return 1;
+ dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID ||
+ dp->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID)
+ return 1;

switch (task->tk_status) {
case 0:
@@ -6438,7 +6460,7 @@ nfs4_laundromat(struct nfsd_net *nn)
dp = list_first_entry(&reaplist, struct nfs4_delegation,
dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
- revoke_delegation(dp);
+ revoke_delegation(dp, NFS4_REVOKED_DELEG_STID);
}

spin_lock(&nn->client_lock);
--
2.42.0

2023-11-01 02:10:56

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed

Howdy Neil-

> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
>
> Revoking state through 'unlock_filesystem' now revokes any delegation
> states found. When the stateids are then freed by the client, the
> revoked stateids will be cleaned up correctly.

Here's my derpy question of the day.

"When the stateids are then freed by the client" seems to be
a repeating trope, and it concerns me a bit (probably because
I haven't yet learned how this mechanism /currently/ works)...

In the case when the client has actually vanished (eg, was
destroyed by an orchestrator), it's not going to be around
to actively free revoked state. Doesn't that situation result
in pinned state on the server? I would expect that's a primary
use case for "unlock_filesystem."

Maybe I've misunderstood something fundamental.


--
Chuck Lever


2023-11-01 02:15:01

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state



> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
>
> The NFSv4 protocol allows state to be revoked by the admin and has error
> codes which allow this to be communicated to the client.
>
> This patch
> - introduces 3 new state-id types for revoked open, lock, and
> delegation state. This requires the bitmask to be 'short',
> not 'char'
> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> - introduces a per-client counter of these states and returns
> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> Decrement this when freeing any admin-revoked state.
> - introduces stub code to find all interesting states for a given
> superblock so they can be revoked via the 'unlock_filesystem'
> file in /proc/fs/nfsd/
> No actual states are handled yet.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfsd/nfs4layouts.c | 2 +-
> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfsctl.c | 1 +
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/state.h | 35 +++++++++++-----
> fs/nfsd/trace.h | 8 +++-
> 6 files changed, 120 insertions(+), 20 deletions(-)

....

> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f96eaa8e9413..3af5ab55c978 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> */
> struct nfs4_stid {
> refcount_t sc_count;
> -#define NFS4_OPEN_STID 1
> -#define NFS4_LOCK_STID 2
> -#define NFS4_DELEG_STID 4
> + struct list_head sc_cp_list;
> + unsigned short sc_type;
> +#define NFS4_OPEN_STID BIT(0)
> +#define NFS4_LOCK_STID BIT(1)
> +#define NFS4_DELEG_STID BIT(2)
> /* For an open stateid kept around *only* to process close replays: */
> -#define NFS4_CLOSED_STID 8
> +#define NFS4_CLOSED_STID BIT(3)
> /* For a deleg stateid kept around only to process free_stateid's: */
> -#define NFS4_REVOKED_DELEG_STID 16
> -#define NFS4_CLOSED_DELEG_STID 32
> -#define NFS4_LAYOUT_STID 64
> - struct list_head sc_cp_list;
> - unsigned char sc_type;
> +#define NFS4_REVOKED_DELEG_STID BIT(4)
> +#define NFS4_CLOSED_DELEG_STID BIT(5)
> +#define NFS4_LAYOUT_STID BIT(6)
> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> + NFS4_ADMIN_REVOKED_LOCK_STID | \
> + NFS4_ADMIN_REVOKED_DELEG_STID)
> stateid_t sc_stateid;
> spinlock_t sc_lock;
> struct nfs4_client *sc_client;
>
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index fbc0ccb40424..e359d531402c 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);

This is a bug that pre-dates your change in this patch...

Since the NFS4_ flags are C macros and not enum symbols,
TRACE_DEFINE_ENUM() is not necessary. All these can be
removed, rather than adding three new ones.

I can fix this up when I apply the series, or if you
happen to send a v3, you can fix it up first.


> #define show_stid_type(x) \
> __print_flags(x, "|", \
> @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> { NFS4_CLOSED_STID, "CLOSED" }, \
> { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> - { NFS4_LAYOUT_STID, "LAYOUT" })
> + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
>
> DECLARE_EVENT_CLASS(nfsd_stid_class,
> TP_PROTO(
> --
> 2.42.0
>

--
Chuck Lever


2023-11-01 02:51:27

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Wed, 01 Nov 2023, Chuck Lever III wrote:
>
> > On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
> >
> > The NFSv4 protocol allows state to be revoked by the admin and has error
> > codes which allow this to be communicated to the client.
> >
> > This patch
> > - introduces 3 new state-id types for revoked open, lock, and
> > delegation state. This requires the bitmask to be 'short',
> > not 'char'
> > - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> > - introduces a per-client counter of these states and returns
> > SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> > Decrement this when freeing any admin-revoked state.
> > - introduces stub code to find all interesting states for a given
> > superblock so they can be revoked via the 'unlock_filesystem'
> > file in /proc/fs/nfsd/
> > No actual states are handled yet.
> >
> > Signed-off-by: NeilBrown <[email protected]>
> > ---
> > fs/nfsd/nfs4layouts.c | 2 +-
> > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> > fs/nfsd/nfsctl.c | 1 +
> > fs/nfsd/nfsd.h | 1 +
> > fs/nfsd/state.h | 35 +++++++++++-----
> > fs/nfsd/trace.h | 8 +++-
> > 6 files changed, 120 insertions(+), 20 deletions(-)
>
> ....
>
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index f96eaa8e9413..3af5ab55c978 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> > */
> > struct nfs4_stid {
> > refcount_t sc_count;
> > -#define NFS4_OPEN_STID 1
> > -#define NFS4_LOCK_STID 2
> > -#define NFS4_DELEG_STID 4
> > + struct list_head sc_cp_list;
> > + unsigned short sc_type;
> > +#define NFS4_OPEN_STID BIT(0)
> > +#define NFS4_LOCK_STID BIT(1)
> > +#define NFS4_DELEG_STID BIT(2)
> > /* For an open stateid kept around *only* to process close replays: */
> > -#define NFS4_CLOSED_STID 8
> > +#define NFS4_CLOSED_STID BIT(3)
> > /* For a deleg stateid kept around only to process free_stateid's: */
> > -#define NFS4_REVOKED_DELEG_STID 16
> > -#define NFS4_CLOSED_DELEG_STID 32
> > -#define NFS4_LAYOUT_STID 64
> > - struct list_head sc_cp_list;
> > - unsigned char sc_type;
> > +#define NFS4_REVOKED_DELEG_STID BIT(4)
> > +#define NFS4_CLOSED_DELEG_STID BIT(5)
> > +#define NFS4_LAYOUT_STID BIT(6)
> > +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> > +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> > +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> > +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> > + NFS4_ADMIN_REVOKED_LOCK_STID | \
> > + NFS4_ADMIN_REVOKED_DELEG_STID)
> > stateid_t sc_stateid;
> > spinlock_t sc_lock;
> > struct nfs4_client *sc_client;
> >
> > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > index fbc0ccb40424..e359d531402c 100644
> > --- a/fs/nfsd/trace.h
> > +++ b/fs/nfsd/trace.h
> > @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> > TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> > TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> > TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);
>
> This is a bug that pre-dates your change in this patch...
>
> Since the NFS4_ flags are C macros and not enum symbols,
> TRACE_DEFINE_ENUM() is not necessary. All these can be
> removed, rather than adding three new ones.
>
> I can fix this up when I apply the series, or if you
> happen to send a v3, you can fix it up first.

OK, thanks. I guess this use of "ENUM" for things that aren't enums
should have been a red flags :-)

NeilBrown

>
>
> > #define show_stid_type(x) \
> > __print_flags(x, "|", \
> > @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > { NFS4_CLOSED_STID, "CLOSED" }, \
> > { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> > { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> > - { NFS4_LAYOUT_STID, "LAYOUT" })
> > + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> > + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> > + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> > + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
> >
> > DECLARE_EVENT_CLASS(nfsd_stid_class,
> > TP_PROTO(
> > --
> > 2.42.0
> >
>
> --
> Chuck Lever
>
>
>

2023-11-01 03:01:58

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed

On Wed, 01 Nov 2023, Chuck Lever III wrote:
> Howdy Neil-
>
> > On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
> >
> > Revoking state through 'unlock_filesystem' now revokes any delegation
> > states found. When the stateids are then freed by the client, the
> > revoked stateids will be cleaned up correctly.
>
> Here's my derpy question of the day.
>
> "When the stateids are then freed by the client" seems to be
> a repeating trope, and it concerns me a bit (probably because
> I haven't yet learned how this mechanism /currently/ works)...
>
> In the case when the client has actually vanished (eg, was
> destroyed by an orchestrator), it's not going to be around
> to actively free revoked state. Doesn't that situation result
> in pinned state on the server? I would expect that's a primary
> use case for "unlock_filesystem."

If a client is de-orchestrated then it will stop renewing its lease, and
regular cleanup of expired state will kick in after one lease period.
So for NFSv4 we don't need to worry about disappearing clients.
For NFSv3 (or more specifically for NLM) we did and locks could hang
around indefinitely if the client died.
For that reason we have /proc/fs/nfsd/unlock_ip which discards all NFSv3
lock state for a given client. Extending that to NFSv4 is not needed
because of leases, and not meaningful because of trunking - a client
might have several IP's.

unlock_filesystem is for when the client is still active and we want to
let it (them) continue accessing some filesystems, but not all.

NeilBrown


>
> Maybe I've misunderstood something fundamental.
>
>
> --
> Chuck Lever
>
>
>

2023-11-01 05:42:36

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed



> On Oct 31, 2023, at 8:01 PM, NeilBrown <[email protected]> wrote:
>
> On Wed, 01 Nov 2023, Chuck Lever III wrote:
>> Howdy Neil-
>>
>>> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
>>>
>>> Revoking state through 'unlock_filesystem' now revokes any delegation
>>> states found. When the stateids are then freed by the client, the
>>> revoked stateids will be cleaned up correctly.
>>
>> Here's my derpy question of the day.
>>
>> "When the stateids are then freed by the client" seems to be
>> a repeating trope, and it concerns me a bit (probably because
>> I haven't yet learned how this mechanism /currently/ works)...
>>
>> In the case when the client has actually vanished (eg, was
>> destroyed by an orchestrator), it's not going to be around
>> to actively free revoked state. Doesn't that situation result
>> in pinned state on the server? I would expect that's a primary
>> use case for "unlock_filesystem."
>
> If a client is de-orchestrated then it will stop renewing its lease, and
> regular cleanup of expired state will kick in after one lease period.

Thanks for educating me.

Such state actually stays around for much longer now as
expired but renewable state. Does unlock_filesystem need
to purge courtesy state too, to make the target filesystem
unexportable and unmountable?


> So for NFSv4 we don't need to worry about disappearing clients.
> For NFSv3 (or more specifically for NLM) we did and locks could hang
> around indefinitely if the client died.
> For that reason we have /proc/fs/nfsd/unlock_ip which discards all NFSv3
> lock state for a given client. Extending that to NFSv4 is not needed
> because of leases, and not meaningful because of trunking - a client
> might have several IP's.
>
> unlock_filesystem is for when the client is still active and we want to
> let it (them) continue accessing some filesystems, but not all.
>
> NeilBrown
>
>
>>
>> Maybe I've misunderstood something fundamental.
>>
>>
>> --
>> Chuck Lever


--
Chuck Lever


2023-11-01 07:43:37

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed

On Wed, 01 Nov 2023, Chuck Lever III wrote:
>
> > On Oct 31, 2023, at 8:01 PM, NeilBrown <[email protected]> wrote:
> >
> > On Wed, 01 Nov 2023, Chuck Lever III wrote:
> >> Howdy Neil-
> >>
> >>> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
> >>>
> >>> Revoking state through 'unlock_filesystem' now revokes any delegation
> >>> states found. When the stateids are then freed by the client, the
> >>> revoked stateids will be cleaned up correctly.
> >>
> >> Here's my derpy question of the day.
> >>
> >> "When the stateids are then freed by the client" seems to be
> >> a repeating trope, and it concerns me a bit (probably because
> >> I haven't yet learned how this mechanism /currently/ works)...
> >>
> >> In the case when the client has actually vanished (eg, was
> >> destroyed by an orchestrator), it's not going to be around
> >> to actively free revoked state. Doesn't that situation result
> >> in pinned state on the server? I would expect that's a primary
> >> use case for "unlock_filesystem."
> >
> > If a client is de-orchestrated then it will stop renewing its lease, and
> > regular cleanup of expired state will kick in after one lease period.
>
> Thanks for educating me.
>
> Such state actually stays around for much longer now as
> expired but renewable state. Does unlock_filesystem need
> to purge courtesy state too, to make the target filesystem
> unexportable and unmountable?

I don't think there is any special case there that we need to deal with.
I haven't explored in detail but I think "courtesy" state is managed at
the client level. Some clients still have valid leases, others are
being maintained only as a courtesy. At the individual state level
there is no difference. The "unlock_filesystem" code examines all
states for all client and selects those for the target filesystem and
revokes those.

NeilBrown


>
>
> > So for NFSv4 we don't need to worry about disappearing clients.
> > For NFSv3 (or more specifically for NLM) we did and locks could hang
> > around indefinitely if the client died.
> > For that reason we have /proc/fs/nfsd/unlock_ip which discards all NFSv3
> > lock state for a given client. Extending that to NFSv4 is not needed
> > because of leases, and not meaningful because of trunking - a client
> > might have several IP's.
> >
> > unlock_filesystem is for when the client is still active and we want to
> > let it (them) continue accessing some filesystems, but not all.
> >
> > NeilBrown
> >
> >
> >>
> >> Maybe I've misunderstood something fundamental.
> >>
> >>
> >> --
> >> Chuck Lever
>
>
> --
> Chuck Lever
>
>
>

2023-11-01 15:42:21

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed



> On Nov 1, 2023, at 12:43 AM, NeilBrown <[email protected]> wrote:
>
> On Wed, 01 Nov 2023, Chuck Lever III wrote:
>>
>>> On Oct 31, 2023, at 8:01 PM, NeilBrown <[email protected]> wrote:
>>>
>>> On Wed, 01 Nov 2023, Chuck Lever III wrote:
>>>> Howdy Neil-
>>>>
>>>>> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
>>>>>
>>>>> Revoking state through 'unlock_filesystem' now revokes any delegation
>>>>> states found. When the stateids are then freed by the client, the
>>>>> revoked stateids will be cleaned up correctly.
>>>>
>>>> Here's my derpy question of the day.
>>>>
>>>> "When the stateids are then freed by the client" seems to be
>>>> a repeating trope, and it concerns me a bit (probably because
>>>> I haven't yet learned how this mechanism /currently/ works)...
>>>>
>>>> In the case when the client has actually vanished (eg, was
>>>> destroyed by an orchestrator), it's not going to be around
>>>> to actively free revoked state. Doesn't that situation result
>>>> in pinned state on the server? I would expect that's a primary
>>>> use case for "unlock_filesystem."
>>>
>>> If a client is de-orchestrated then it will stop renewing its lease, and
>>> regular cleanup of expired state will kick in after one lease period.
>>
>> Thanks for educating me.
>>
>> Such state actually stays around for much longer now as
>> expired but renewable state. Does unlock_filesystem need
>> to purge courtesy state too, to make the target filesystem
>> unexportable and unmountable?
>
> I don't think there is any special case there that we need to deal with.
> I haven't explored in detail but I think "courtesy" state is managed at
> the client level. Some clients still have valid leases, others are
> being maintained only as a courtesy. At the individual state level
> there is no difference. The "unlock_filesystem" code examines all
> states for all client and selects those for the target filesystem and
> revokes those.

Dai can correct me if I've misremembered, but NFSD's courteous
server does not currently implement partial loss of state. If
any of a client's state is lost while it is disconnected, the
server discards its entire lease.

Thus if an admin unlocks a filesystem that a disconnected client
has some open files on, that client's entire lease should be
purged.


>>> So for NFSv4 we don't need to worry about disappearing clients.
>>> For NFSv3 (or more specifically for NLM) we did and locks could hang
>>> around indefinitely if the client died.
>>> For that reason we have /proc/fs/nfsd/unlock_ip which discards all NFSv3
>>> lock state for a given client. Extending that to NFSv4 is not needed
>>> because of leases, and not meaningful because of trunking - a client
>>> might have several IP's.
>>>
>>> unlock_filesystem is for when the client is still active and we want to
>>> let it (them) continue accessing some filesystems, but not all.
>>>
>>> NeilBrown
>>>
>>>
>>>>
>>>> Maybe I've misunderstood something fundamental.
>>>>
>>>>
>>>> --
>>>> Chuck Lever
>>
>>
>> --
>> Chuck Lever


--
Chuck Lever


2023-11-01 17:41:27

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH 6/6] nfsd: allow delegation state ids to be revoked and then freed


On 11/1/23 8:41 AM, Chuck Lever III wrote:
>
>> On Nov 1, 2023, at 12:43 AM, NeilBrown <[email protected]> wrote:
>>
>> On Wed, 01 Nov 2023, Chuck Lever III wrote:
>>>> On Oct 31, 2023, at 8:01 PM, NeilBrown <[email protected]> wrote:
>>>>
>>>> On Wed, 01 Nov 2023, Chuck Lever III wrote:
>>>>> Howdy Neil-
>>>>>
>>>>>> On Oct 31, 2023, at 5:57 PM, NeilBrown <[email protected]> wrote:
>>>>>>
>>>>>> Revoking state through 'unlock_filesystem' now revokes any delegation
>>>>>> states found. When the stateids are then freed by the client, the
>>>>>> revoked stateids will be cleaned up correctly.
>>>>> Here's my derpy question of the day.
>>>>>
>>>>> "When the stateids are then freed by the client" seems to be
>>>>> a repeating trope, and it concerns me a bit (probably because
>>>>> I haven't yet learned how this mechanism /currently/ works)...
>>>>>
>>>>> In the case when the client has actually vanished (eg, was
>>>>> destroyed by an orchestrator), it's not going to be around
>>>>> to actively free revoked state. Doesn't that situation result
>>>>> in pinned state on the server? I would expect that's a primary
>>>>> use case for "unlock_filesystem."
>>>> If a client is de-orchestrated then it will stop renewing its lease, and
>>>> regular cleanup of expired state will kick in after one lease period.
>>> Thanks for educating me.
>>>
>>> Such state actually stays around for much longer now as
>>> expired but renewable state. Does unlock_filesystem need
>>> to purge courtesy state too, to make the target filesystem
>>> unexportable and unmountable?
>> I don't think there is any special case there that we need to deal with.
>> I haven't explored in detail but I think "courtesy" state is managed at
>> the client level. Some clients still have valid leases, others are
>> being maintained only as a courtesy. At the individual state level
>> there is no difference. The "unlock_filesystem" code examines all
>> states for all client and selects those for the target filesystem and
>> revokes those.
> Dai can correct me if I've misremembered, but NFSD's courteous
> server does not currently implement partial loss of state. If
> any of a client's state is lost while it is disconnected, the
> server discards its entire lease.
>
> Thus if an admin unlocks a filesystem that a disconnected client
> has some open files on, that client's entire lease should be
> purged.

Ben is correct, courtesy state is managed at the client level. The
server does not deal with individual state of the courtesy client.
The courtesy clients and their states are allowed to be around until
the memory shrinker kicks in and the worker starts reaping clients in
NFSD4_COURTESY state.

-Dai

>
>
>>>> So for NFSv4 we don't need to worry about disappearing clients.
>>>> For NFSv3 (or more specifically for NLM) we did and locks could hang
>>>> around indefinitely if the client died.
>>>> For that reason we have /proc/fs/nfsd/unlock_ip which discards all NFSv3
>>>> lock state for a given client. Extending that to NFSv4 is not needed
>>>> because of leases, and not meaningful because of trunking - a client
>>>> might have several IP's.
>>>>
>>>> unlock_filesystem is for when the client is still active and we want to
>>>> let it (them) continue accessing some filesystems, but not all.
>>>>
>>>> NeilBrown
>>>>
>>>>
>>>>> Maybe I've misunderstood something fundamental.
>>>>>
>>>>>
>>>>> --
>>>>> Chuck Lever
>>>
>>> --
>>> Chuck Lever
>
> --
> Chuck Lever
>
>

2023-11-02 10:46:37

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> The NFSv4 protocol allows state to be revoked by the admin and has error
> codes which allow this to be communicated to the client.
>
> This patch
> - introduces 3 new state-id types for revoked open, lock, and
> delegation state. This requires the bitmask to be 'short',
> not 'char'
> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> - introduces a per-client counter of these states and returns
> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> Decrement this when freeing any admin-revoked state.
> - introduces stub code to find all interesting states for a given
> superblock so they can be revoked via the 'unlock_filesystem'
> file in /proc/fs/nfsd/
> No actual states are handled yet.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfsd/nfs4layouts.c | 2 +-
> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfsctl.c | 1 +
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/state.h | 35 +++++++++++-----
> fs/nfsd/trace.h | 8 +++-
> 6 files changed, 120 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 5e8096bc5eaa..09d0363bfbc4 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> {
> struct nfs4_layout_stateid *ls;
> struct nfs4_stid *stid;
> - unsigned char typemask = NFS4_LAYOUT_STID;
> + unsigned short typemask = NFS4_LAYOUT_STID;
> __be32 status;
>
> if (create)
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 65fd5510323a..f3ba53a16105 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> return NULL;
> }
>
> +void nfs4_unhash_stid(struct nfs4_stid *s)
> +{
> + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> + atomic_dec(&s->sc_client->cl_admin_revoked);
> + s->sc_type = 0;
> +}
> +
> void
> nfs4_put_stid(struct nfs4_stid *s)
> {
> @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> return;
> }
> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> + nfs4_unhash_stid(s);
> nfs4_free_cpntf_statelist(clp->net, s);
> spin_unlock(&clp->cl_lock);
> s->sc_free(s);
> @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> nfs4_put_stid(&dp->dl_stid);
> }
>
> -void nfs4_unhash_stid(struct nfs4_stid *s)
> -{
> - s->sc_type = 0;
> -}
> -
> /**
> * nfs4_delegation_exists - Discover if this delegation already exists
> * @clp: a pointer to the nfs4_client we're granting a delegation to
> @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> }
>
> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> + nfs4_unhash_stid(s);
> list_add(&stp->st_locks, reaplist);
> }
>
> @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> nfs4_put_stateowner(&oo->oo_owner);
> }
>
> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> + struct super_block *sb,
> + unsigned short sc_types)
> +{
> + unsigned long id, tmp;
> + struct nfs4_stid *stid;
> +
> + spin_lock(&clp->cl_lock);
> + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> + if ((stid->sc_type & sc_types) &&
> + stid->sc_file->fi_inode->i_sb == sb) {
> + refcount_inc(&stid->sc_count);
> + break;
> + }
> + spin_unlock(&clp->cl_lock);
> + return stid;
> +}
> +
> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> + unsigned int idhashval;
> + unsigned short sc_types;
> +
> + sc_types = 0;
> +
> + spin_lock(&nn->client_lock);
> + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> + struct nfs4_client *clp;
> + retry:
> + list_for_each_entry(clp, head, cl_idhash) {
> + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> + sc_types);
> + if (stid) {
> + spin_unlock(&nn->client_lock);
> + switch (stid->sc_type) {
> + }
> + nfs4_put_stid(stid);
> + spin_lock(&nn->client_lock);
> + goto retry;
> + }
> + }
> + }
> + spin_unlock(&nn->client_lock);
> +}
> +
> static inline int
> hash_sessionid(struct nfs4_sessionid *sessionid)
> {
> @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> }
>
> static struct nfs4_stid *
> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> + unsigned short typemask)
> {
> struct nfs4_stid *s;
>
> @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> }
> seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> + seq_printf(m, "admin-revoked states: %d\n",
> + atomic_read(&clp->cl_admin_revoked));
> drop_client(clp);
>
> return 0;
> @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> }
> if (!list_empty(&clp->cl_revoked))
> seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> + if (atomic_read(&clp->cl_admin_revoked))
> + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> out_no_session:
> if (conn)
> free_conn(conn);
> @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> status = nfserr_deleg_revoked;
> goto out;
> }
> + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> + nfs4_put_stid(&deleg->dl_stid);
> + status = nfserr_admin_revoked;
> + goto out;
> + }
> flags = share_access_to_flags(open->op_share_access);
> status = nfs4_check_delegmode(deleg, flags);
> if (status) {
> @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> case NFS4_REVOKED_DELEG_STID:
> status = nfserr_deleg_revoked;
> break;
> + case NFS4_ADMIN_REVOKED_STID:
> + case NFS4_ADMIN_REVOKED_LOCK_STID:
> + case NFS4_ADMIN_REVOKED_DELEG_STID:
> + status = nfserr_admin_revoked;
> + break;
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>
> __be32
> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_stid **s, struct nfsd_net *nn)
> {
> __be32 status;
> @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> else if (typemask & NFS4_DELEG_STID)
> typemask |= NFS4_REVOKED_DELEG_STID;
>
> + if (typemask & NFS4_OPEN_STID)
> + typemask |= NFS4_ADMIN_REVOKED_STID;
> + if (typemask & NFS4_LOCK_STID)
> + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> + if (typemask & NFS4_DELEG_STID)
> + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> +
> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> CLOSE_STATEID(stateid))
> return nfserr_bad_stateid;
> @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> return nfserr_deleg_revoked;
> return nfserr_bad_stateid;
> }
> + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> + nfs4_put_stid(stid);
> + return nfserr_admin_revoked;
> + }
> *s = stid;
> return nfs_ok;
> }
> @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> */
> static __be32
> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> - stateid_t *stateid, char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_ol_stateid **stpp,
> struct nfsd_net *nn)
> {
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 739ed5bf71cd..50368eec86b0 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> * 3. Is that directory the root of an exported file system?
> */
> error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
>
> path_put(&path);
> return error;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index f5ff42f41ee7..d46203eac3c8 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f96eaa8e9413..3af5ab55c978 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> */
> struct nfs4_stid {
> refcount_t sc_count;
> -#define NFS4_OPEN_STID 1
> -#define NFS4_LOCK_STID 2
> -#define NFS4_DELEG_STID 4
> + struct list_head sc_cp_list;
> + unsigned short sc_type;

Should we just go ahead and make this a full 32-bit word? We seem to
keep adding flags to this field, and I doubt we're saving anything by
making this a short.

> +#define NFS4_OPEN_STID BIT(0)
> +#define NFS4_LOCK_STID BIT(1)
> +#define NFS4_DELEG_STID BIT(2)
> /* For an open stateid kept around *only* to process close replays: */
> -#define NFS4_CLOSED_STID 8
> +#define NFS4_CLOSED_STID BIT(3)
> /* For a deleg stateid kept around only to process free_stateid's: */
> -#define NFS4_REVOKED_DELEG_STID 16
> -#define NFS4_CLOSED_DELEG_STID 32
> -#define NFS4_LAYOUT_STID 64
> - struct list_head sc_cp_list;
> - unsigned char sc_type;
> +#define NFS4_REVOKED_DELEG_STID BIT(4)
> +#define NFS4_CLOSED_DELEG_STID BIT(5)
> +#define NFS4_LAYOUT_STID BIT(6)
> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> + NFS4_ADMIN_REVOKED_LOCK_STID | \
> + NFS4_ADMIN_REVOKED_DELEG_STID)

Not a specific criticism of these patches, since this problem preexists
them, but I really dislike the way that sc_type is used as a bitmask,
but also sort of like an enum. In some cases, we test for specific flags
in the mask, and in other cases (e.g. states_show), we treat them as
discrete values to feed it to a switch().

Personally, I'd find this less confusing if we just treat this as a set
of flags full-stop. We could leave the low-order bits to show the real
type (open, lock, deleg, etc.) and just mask off the high-order bits
when we need to feed it to a switch statement.

For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
and leave the old type bit in place instead of changing it to a new
discrete sc_type value.

> stateid_t sc_stateid;
> spinlock_t sc_lock;
> struct nfs4_client *sc_client;
> @@ -372,6 +378,7 @@ struct nfs4_client {
> clientid_t cl_clientid; /* generated by server */
> nfs4_verifier cl_confirm; /* generated by server */
> u32 cl_minorversion;
> + atomic_t cl_admin_revoked; /* count of admin-revoked states */
> /* NFSv4.1 client implementation id: */
> struct xdr_netobj cl_nii_domain;
> struct xdr_netobj cl_nii_name;
> @@ -694,7 +701,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> stateid_t *stateid, int flags, struct nfsd_file **filp,
> struct nfs4_stid **cstid);
> __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_stid **s, struct nfsd_net *nn);
> struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> void (*sc_free)(struct nfs4_stid *));
> @@ -736,6 +743,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
> }
> struct nfsd_file *find_any_file(struct nfs4_file *f);
>
> +#ifdef CONFIG_NFSD_V4
> +void nfsd4_revoke_states(struct net *net, struct super_block *sb);
> +#else
> +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> +}
> +#endif
> +
> /* grace period management */
> void nfsd4_end_grace(struct nfsd_net *nn);
>
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index fbc0ccb40424..e359d531402c 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);
>
> #define show_stid_type(x) \
> __print_flags(x, "|", \
> @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> { NFS4_CLOSED_STID, "CLOSED" }, \
> { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> - { NFS4_LAYOUT_STID, "LAYOUT" })
> + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
>
> DECLARE_EVENT_CLASS(nfsd_stid_class,
> TP_PROTO(

--
Jeff Layton <[email protected]>

2023-11-02 11:26:20

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> The NFSv4 protocol allows state to be revoked by the admin and has error
> codes which allow this to be communicated to the client.
>
> This patch
> - introduces 3 new state-id types for revoked open, lock, and
> delegation state. This requires the bitmask to be 'short',
> not 'char'
> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> - introduces a per-client counter of these states and returns
> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> Decrement this when freeing any admin-revoked state.
> - introduces stub code to find all interesting states for a given
> superblock so they can be revoked via the 'unlock_filesystem'
> file in /proc/fs/nfsd/
> No actual states are handled yet.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfsd/nfs4layouts.c | 2 +-
> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfsctl.c | 1 +
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/state.h | 35 +++++++++++-----
> fs/nfsd/trace.h | 8 +++-
> 6 files changed, 120 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 5e8096bc5eaa..09d0363bfbc4 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> {
> struct nfs4_layout_stateid *ls;
> struct nfs4_stid *stid;
> - unsigned char typemask = NFS4_LAYOUT_STID;
> + unsigned short typemask = NFS4_LAYOUT_STID;
> __be32 status;
>
> if (create)
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 65fd5510323a..f3ba53a16105 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> return NULL;
> }
>
> +void nfs4_unhash_stid(struct nfs4_stid *s)
> +{
> + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> + atomic_dec(&s->sc_client->cl_admin_revoked);
> + s->sc_type = 0;
> +}
> +

Another thing that would be helpful to clarify here is the locking (if
any) around sc_type. Currently, it's just a bare non-atomic variable,
but we do change it occasionally and it has never been clear whether and
how it should be protected.

Any thoughts?

> void
> nfs4_put_stid(struct nfs4_stid *s)
> {
> @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> return;
> }
> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> + nfs4_unhash_stid(s);
> nfs4_free_cpntf_statelist(clp->net, s);
> spin_unlock(&clp->cl_lock);
> s->sc_free(s);
> @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> nfs4_put_stid(&dp->dl_stid);
> }
>
> -void nfs4_unhash_stid(struct nfs4_stid *s)
> -{
> - s->sc_type = 0;
> -}
> -
> /**
> * nfs4_delegation_exists - Discover if this delegation already exists
> * @clp: a pointer to the nfs4_client we're granting a delegation to
> @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> }
>
> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> + nfs4_unhash_stid(s);
> list_add(&stp->st_locks, reaplist);
> }
>
> @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> nfs4_put_stateowner(&oo->oo_owner);
> }
>
> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> + struct super_block *sb,
> + unsigned short sc_types)
> +{
> + unsigned long id, tmp;
> + struct nfs4_stid *stid;
> +
> + spin_lock(&clp->cl_lock);
> + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> + if ((stid->sc_type & sc_types) &&
> + stid->sc_file->fi_inode->i_sb == sb) {
> + refcount_inc(&stid->sc_count);
> + break;
> + }
> + spin_unlock(&clp->cl_lock);
> + return stid;
> +}
> +
> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> + unsigned int idhashval;
> + unsigned short sc_types;
> +
> + sc_types = 0;
> +
> + spin_lock(&nn->client_lock);
> + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> + struct nfs4_client *clp;
> + retry:
> + list_for_each_entry(clp, head, cl_idhash) {
> + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> + sc_types);
> + if (stid) {
> + spin_unlock(&nn->client_lock);
> + switch (stid->sc_type) {
> + }
> + nfs4_put_stid(stid);
> + spin_lock(&nn->client_lock);
> + goto retry;
> + }
> + }
> + }
> + spin_unlock(&nn->client_lock);
> +}
> +
> static inline int
> hash_sessionid(struct nfs4_sessionid *sessionid)
> {
> @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> }
>
> static struct nfs4_stid *
> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> + unsigned short typemask)
> {
> struct nfs4_stid *s;
>
> @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> }
> seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> + seq_printf(m, "admin-revoked states: %d\n",
> + atomic_read(&clp->cl_admin_revoked));
> drop_client(clp);
>
> return 0;
> @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> }
> if (!list_empty(&clp->cl_revoked))
> seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> + if (atomic_read(&clp->cl_admin_revoked))
> + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> out_no_session:
> if (conn)
> free_conn(conn);
> @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> status = nfserr_deleg_revoked;
> goto out;
> }
> + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> + nfs4_put_stid(&deleg->dl_stid);
> + status = nfserr_admin_revoked;
> + goto out;
> + }
> flags = share_access_to_flags(open->op_share_access);
> status = nfs4_check_delegmode(deleg, flags);
> if (status) {
> @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> case NFS4_REVOKED_DELEG_STID:
> status = nfserr_deleg_revoked;
> break;
> + case NFS4_ADMIN_REVOKED_STID:
> + case NFS4_ADMIN_REVOKED_LOCK_STID:
> + case NFS4_ADMIN_REVOKED_DELEG_STID:
> + status = nfserr_admin_revoked;
> + break;
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>
> __be32
> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_stid **s, struct nfsd_net *nn)
> {
> __be32 status;
> @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> else if (typemask & NFS4_DELEG_STID)
> typemask |= NFS4_REVOKED_DELEG_STID;
>
> + if (typemask & NFS4_OPEN_STID)
> + typemask |= NFS4_ADMIN_REVOKED_STID;
> + if (typemask & NFS4_LOCK_STID)
> + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> + if (typemask & NFS4_DELEG_STID)
> + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> +
> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> CLOSE_STATEID(stateid))
> return nfserr_bad_stateid;
> @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> return nfserr_deleg_revoked;
> return nfserr_bad_stateid;
> }
> + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> + nfs4_put_stid(stid);
> + return nfserr_admin_revoked;
> + }
> *s = stid;
> return nfs_ok;
> }
> @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> */
> static __be32
> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> - stateid_t *stateid, char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_ol_stateid **stpp,
> struct nfsd_net *nn)
> {
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 739ed5bf71cd..50368eec86b0 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> * 3. Is that directory the root of an exported file system?
> */
> error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
>
> path_put(&path);
> return error;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index f5ff42f41ee7..d46203eac3c8 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f96eaa8e9413..3af5ab55c978 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> */
> struct nfs4_stid {
> refcount_t sc_count;
> -#define NFS4_OPEN_STID 1
> -#define NFS4_LOCK_STID 2
> -#define NFS4_DELEG_STID 4
> + struct list_head sc_cp_list;
> + unsigned short sc_type;
> +#define NFS4_OPEN_STID BIT(0)
> +#define NFS4_LOCK_STID BIT(1)
> +#define NFS4_DELEG_STID BIT(2)
> /* For an open stateid kept around *only* to process close replays: */
> -#define NFS4_CLOSED_STID 8
> +#define NFS4_CLOSED_STID BIT(3)
> /* For a deleg stateid kept around only to process free_stateid's: */
> -#define NFS4_REVOKED_DELEG_STID 16
> -#define NFS4_CLOSED_DELEG_STID 32
> -#define NFS4_LAYOUT_STID 64
> - struct list_head sc_cp_list;
> - unsigned char sc_type;
> +#define NFS4_REVOKED_DELEG_STID BIT(4)
> +#define NFS4_CLOSED_DELEG_STID BIT(5)
> +#define NFS4_LAYOUT_STID BIT(6)
> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> + NFS4_ADMIN_REVOKED_LOCK_STID | \
> + NFS4_ADMIN_REVOKED_DELEG_STID)
> stateid_t sc_stateid;
> spinlock_t sc_lock;
> struct nfs4_client *sc_client;
> @@ -372,6 +378,7 @@ struct nfs4_client {
> clientid_t cl_clientid; /* generated by server */
> nfs4_verifier cl_confirm; /* generated by server */
> u32 cl_minorversion;
> + atomic_t cl_admin_revoked; /* count of admin-revoked states */
> /* NFSv4.1 client implementation id: */
> struct xdr_netobj cl_nii_domain;
> struct xdr_netobj cl_nii_name;
> @@ -694,7 +701,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> stateid_t *stateid, int flags, struct nfsd_file **filp,
> struct nfs4_stid **cstid);
> __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid, unsigned short typemask,
> struct nfs4_stid **s, struct nfsd_net *nn);
> struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> void (*sc_free)(struct nfs4_stid *));
> @@ -736,6 +743,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
> }
> struct nfsd_file *find_any_file(struct nfs4_file *f);
>
> +#ifdef CONFIG_NFSD_V4
> +void nfsd4_revoke_states(struct net *net, struct super_block *sb);
> +#else
> +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> +{
> +}
> +#endif
> +
> /* grace period management */
> void nfsd4_end_grace(struct nfsd_net *nn);
>
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index fbc0ccb40424..e359d531402c 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);
>
> #define show_stid_type(x) \
> __print_flags(x, "|", \
> @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> { NFS4_CLOSED_STID, "CLOSED" }, \
> { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> - { NFS4_LAYOUT_STID, "LAYOUT" })
> + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
>
> DECLARE_EVENT_CLASS(nfsd_stid_class,
> TP_PROTO(

--
Jeff Layton <[email protected]>

2023-11-02 11:29:51

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 0/6 v2] support admin-revocation of v4 state

On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> This set fixes two issues found by kernel test robot
> 1/ nfsd4_revoke_states() needs to be defined when not CONFIG_NFSD_V4
> 2/ nfs40_last_revoke is larger than a mechine word in 32 bit configs,
> so needs protection - use nn->client_lock which is already taken when
> nfs40_last_revoke is accessed.
>
> NeilBrown
>

In general, the series looks like something reasonable to do (and
something admins would welcome). I have some (long standing) concerns
around the sc_type value though that I think we probably need to address
prior to or in conjunction with this set.

Cheers,
--
Jeff Layton <[email protected]>

2023-11-02 20:03:36

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Thu, 02 Nov 2023, Jeff Layton wrote:
> On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> > The NFSv4 protocol allows state to be revoked by the admin and has error
> > codes which allow this to be communicated to the client.
> >
> > This patch
> > - introduces 3 new state-id types for revoked open, lock, and
> > delegation state. This requires the bitmask to be 'short',
> > not 'char'
> > - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> > - introduces a per-client counter of these states and returns
> > SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> > Decrement this when freeing any admin-revoked state.
> > - introduces stub code to find all interesting states for a given
> > superblock so they can be revoked via the 'unlock_filesystem'
> > file in /proc/fs/nfsd/
> > No actual states are handled yet.
> >
> > Signed-off-by: NeilBrown <[email protected]>
> > ---
> > fs/nfsd/nfs4layouts.c | 2 +-
> > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> > fs/nfsd/nfsctl.c | 1 +
> > fs/nfsd/nfsd.h | 1 +
> > fs/nfsd/state.h | 35 +++++++++++-----
> > fs/nfsd/trace.h | 8 +++-
> > 6 files changed, 120 insertions(+), 20 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > index 5e8096bc5eaa..09d0363bfbc4 100644
> > --- a/fs/nfsd/nfs4layouts.c
> > +++ b/fs/nfsd/nfs4layouts.c
> > @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> > {
> > struct nfs4_layout_stateid *ls;
> > struct nfs4_stid *stid;
> > - unsigned char typemask = NFS4_LAYOUT_STID;
> > + unsigned short typemask = NFS4_LAYOUT_STID;
> > __be32 status;
> >
> > if (create)
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 65fd5510323a..f3ba53a16105 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > return NULL;
> > }
> >
> > +void nfs4_unhash_stid(struct nfs4_stid *s)
> > +{
> > + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> > + atomic_dec(&s->sc_client->cl_admin_revoked);
> > + s->sc_type = 0;
> > +}
> > +
> > void
> > nfs4_put_stid(struct nfs4_stid *s)
> > {
> > @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> > return;
> > }
> > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > + nfs4_unhash_stid(s);
> > nfs4_free_cpntf_statelist(clp->net, s);
> > spin_unlock(&clp->cl_lock);
> > s->sc_free(s);
> > @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> > nfs4_put_stid(&dp->dl_stid);
> > }
> >
> > -void nfs4_unhash_stid(struct nfs4_stid *s)
> > -{
> > - s->sc_type = 0;
> > -}
> > -
> > /**
> > * nfs4_delegation_exists - Discover if this delegation already exists
> > * @clp: a pointer to the nfs4_client we're granting a delegation to
> > @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> > }
> >
> > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > + nfs4_unhash_stid(s);
> > list_add(&stp->st_locks, reaplist);
> > }
> >
> > @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> > nfs4_put_stateowner(&oo->oo_owner);
> > }
> >
> > +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> > + struct super_block *sb,
> > + unsigned short sc_types)
> > +{
> > + unsigned long id, tmp;
> > + struct nfs4_stid *stid;
> > +
> > + spin_lock(&clp->cl_lock);
> > + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> > + if ((stid->sc_type & sc_types) &&
> > + stid->sc_file->fi_inode->i_sb == sb) {
> > + refcount_inc(&stid->sc_count);
> > + break;
> > + }
> > + spin_unlock(&clp->cl_lock);
> > + return stid;
> > +}
> > +
> > +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > +{
> > + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > + unsigned int idhashval;
> > + unsigned short sc_types;
> > +
> > + sc_types = 0;
> > +
> > + spin_lock(&nn->client_lock);
> > + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> > + struct nfs4_client *clp;
> > + retry:
> > + list_for_each_entry(clp, head, cl_idhash) {
> > + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> > + sc_types);
> > + if (stid) {
> > + spin_unlock(&nn->client_lock);
> > + switch (stid->sc_type) {
> > + }
> > + nfs4_put_stid(stid);
> > + spin_lock(&nn->client_lock);
> > + goto retry;
> > + }
> > + }
> > + }
> > + spin_unlock(&nn->client_lock);
> > +}
> > +
> > static inline int
> > hash_sessionid(struct nfs4_sessionid *sessionid)
> > {
> > @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> > }
> >
> > static struct nfs4_stid *
> > -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> > +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> > + unsigned short typemask)
> > {
> > struct nfs4_stid *s;
> >
> > @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> > }
> > seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> > seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> > + seq_printf(m, "admin-revoked states: %d\n",
> > + atomic_read(&clp->cl_admin_revoked));
> > drop_client(clp);
> >
> > return 0;
> > @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > }
> > if (!list_empty(&clp->cl_revoked))
> > seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> > + if (atomic_read(&clp->cl_admin_revoked))
> > + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> > out_no_session:
> > if (conn)
> > free_conn(conn);
> > @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> > status = nfserr_deleg_revoked;
> > goto out;
> > }
> > + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> > + nfs4_put_stid(&deleg->dl_stid);
> > + status = nfserr_admin_revoked;
> > + goto out;
> > + }
> > flags = share_access_to_flags(open->op_share_access);
> > status = nfs4_check_delegmode(deleg, flags);
> > if (status) {
> > @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > case NFS4_REVOKED_DELEG_STID:
> > status = nfserr_deleg_revoked;
> > break;
> > + case NFS4_ADMIN_REVOKED_STID:
> > + case NFS4_ADMIN_REVOKED_LOCK_STID:
> > + case NFS4_ADMIN_REVOKED_DELEG_STID:
> > + status = nfserr_admin_revoked;
> > + break;
> > case NFS4_OPEN_STID:
> > case NFS4_LOCK_STID:
> > status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> > @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> >
> > __be32
> > nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > - stateid_t *stateid, unsigned char typemask,
> > + stateid_t *stateid, unsigned short typemask,
> > struct nfs4_stid **s, struct nfsd_net *nn)
> > {
> > __be32 status;
> > @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > else if (typemask & NFS4_DELEG_STID)
> > typemask |= NFS4_REVOKED_DELEG_STID;
> >
> > + if (typemask & NFS4_OPEN_STID)
> > + typemask |= NFS4_ADMIN_REVOKED_STID;
> > + if (typemask & NFS4_LOCK_STID)
> > + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> > + if (typemask & NFS4_DELEG_STID)
> > + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> > +
> > if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> > CLOSE_STATEID(stateid))
> > return nfserr_bad_stateid;
> > @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > return nfserr_deleg_revoked;
> > return nfserr_bad_stateid;
> > }
> > + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> > + nfs4_put_stid(stid);
> > + return nfserr_admin_revoked;
> > + }
> > *s = stid;
> > return nfs_ok;
> > }
> > @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> > */
> > static __be32
> > nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> > - stateid_t *stateid, char typemask,
> > + stateid_t *stateid, unsigned short typemask,
> > struct nfs4_ol_stateid **stpp,
> > struct nfsd_net *nn)
> > {
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 739ed5bf71cd..50368eec86b0 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> > * 3. Is that directory the root of an exported file system?
> > */
> > error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> > + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> >
> > path_put(&path);
> > return error;
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index f5ff42f41ee7..d46203eac3c8 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> > #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> > #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> > #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> > +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> > #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> > #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> > #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index f96eaa8e9413..3af5ab55c978 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> > */
> > struct nfs4_stid {
> > refcount_t sc_count;
> > -#define NFS4_OPEN_STID 1
> > -#define NFS4_LOCK_STID 2
> > -#define NFS4_DELEG_STID 4
> > + struct list_head sc_cp_list;
> > + unsigned short sc_type;
>
> Should we just go ahead and make this a full 32-bit word? We seem to
> keep adding flags to this field, and I doubt we're saving anything by
> making this a short.
>
> > +#define NFS4_OPEN_STID BIT(0)
> > +#define NFS4_LOCK_STID BIT(1)
> > +#define NFS4_DELEG_STID BIT(2)
> > /* For an open stateid kept around *only* to process close replays: */
> > -#define NFS4_CLOSED_STID 8
> > +#define NFS4_CLOSED_STID BIT(3)
> > /* For a deleg stateid kept around only to process free_stateid's: */
> > -#define NFS4_REVOKED_DELEG_STID 16
> > -#define NFS4_CLOSED_DELEG_STID 32
> > -#define NFS4_LAYOUT_STID 64
> > - struct list_head sc_cp_list;
> > - unsigned char sc_type;
> > +#define NFS4_REVOKED_DELEG_STID BIT(4)
> > +#define NFS4_CLOSED_DELEG_STID BIT(5)
> > +#define NFS4_LAYOUT_STID BIT(6)
> > +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> > +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> > +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> > +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> > + NFS4_ADMIN_REVOKED_LOCK_STID | \
> > + NFS4_ADMIN_REVOKED_DELEG_STID)
>
> Not a specific criticism of these patches, since this problem preexists
> them, but I really dislike the way that sc_type is used as a bitmask,
> but also sort of like an enum. In some cases, we test for specific flags
> in the mask, and in other cases (e.g. states_show), we treat them as
> discrete values to feed it to a switch().
>
> Personally, I'd find this less confusing if we just treat this as a set
> of flags full-stop. We could leave the low-order bits to show the real
> type (open, lock, deleg, etc.) and just mask off the high-order bits
> when we need to feed it to a switch statement.
>
> For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
> could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
> and leave the old type bit in place instead of changing it to a new
> discrete sc_type value.

I agree.
Bits might be:
OPEN
LOCK
DELEG
LAYOUT
CLOSED (combines with OPEN or DELEG)
REVOKED (combines with DELEG)
ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
with DELEG)

so we could go back to a char :-) Probably sensible to use unsigned int
though.

For updates the rule would be that bits can be set but never cleared so
you don't need locking to read unless you care about a bit that can be
changed.

cl_lock is held most often for updates, but not always. We should see
if we can make that more consistent. Maybe we could use sc_lock
instead.
I'll have a look.

Thanks,
NeilBrown


>
> > stateid_t sc_stateid;
> > spinlock_t sc_lock;
> > struct nfs4_client *sc_client;
> > @@ -372,6 +378,7 @@ struct nfs4_client {
> > clientid_t cl_clientid; /* generated by server */
> > nfs4_verifier cl_confirm; /* generated by server */
> > u32 cl_minorversion;
> > + atomic_t cl_admin_revoked; /* count of admin-revoked states */
> > /* NFSv4.1 client implementation id: */
> > struct xdr_netobj cl_nii_domain;
> > struct xdr_netobj cl_nii_name;
> > @@ -694,7 +701,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> > stateid_t *stateid, int flags, struct nfsd_file **filp,
> > struct nfs4_stid **cstid);
> > __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > - stateid_t *stateid, unsigned char typemask,
> > + stateid_t *stateid, unsigned short typemask,
> > struct nfs4_stid **s, struct nfsd_net *nn);
> > struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> > void (*sc_free)(struct nfs4_stid *));
> > @@ -736,6 +743,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
> > }
> > struct nfsd_file *find_any_file(struct nfs4_file *f);
> >
> > +#ifdef CONFIG_NFSD_V4
> > +void nfsd4_revoke_states(struct net *net, struct super_block *sb);
> > +#else
> > +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > +{
> > +}
> > +#endif
> > +
> > /* grace period management */
> > void nfsd4_end_grace(struct nfsd_net *nn);
> >
> > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > index fbc0ccb40424..e359d531402c 100644
> > --- a/fs/nfsd/trace.h
> > +++ b/fs/nfsd/trace.h
> > @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> > TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> > TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> > TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);
> >
> > #define show_stid_type(x) \
> > __print_flags(x, "|", \
> > @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > { NFS4_CLOSED_STID, "CLOSED" }, \
> > { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> > { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> > - { NFS4_LAYOUT_STID, "LAYOUT" })
> > + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> > + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> > + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> > + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
> >
> > DECLARE_EVENT_CLASS(nfsd_stid_class,
> > TP_PROTO(
>
> --
> Jeff Layton <[email protected]>
>

2023-11-02 20:25:03

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Fri, 2023-11-03 at 07:03 +1100, NeilBrown wrote:
> On Thu, 02 Nov 2023, Jeff Layton wrote:
> > On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> > > The NFSv4 protocol allows state to be revoked by the admin and has error
> > > codes which allow this to be communicated to the client.
> > >
> > > This patch
> > > - introduces 3 new state-id types for revoked open, lock, and
> > > delegation state. This requires the bitmask to be 'short',
> > > not 'char'
> > > - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> > > - introduces a per-client counter of these states and returns
> > > SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> > > Decrement this when freeing any admin-revoked state.
> > > - introduces stub code to find all interesting states for a given
> > > superblock so they can be revoked via the 'unlock_filesystem'
> > > file in /proc/fs/nfsd/
> > > No actual states are handled yet.
> > >
> > > Signed-off-by: NeilBrown <[email protected]>
> > > ---
> > > fs/nfsd/nfs4layouts.c | 2 +-
> > > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> > > fs/nfsd/nfsctl.c | 1 +
> > > fs/nfsd/nfsd.h | 1 +
> > > fs/nfsd/state.h | 35 +++++++++++-----
> > > fs/nfsd/trace.h | 8 +++-
> > > 6 files changed, 120 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > > index 5e8096bc5eaa..09d0363bfbc4 100644
> > > --- a/fs/nfsd/nfs4layouts.c
> > > +++ b/fs/nfsd/nfs4layouts.c
> > > @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> > > {
> > > struct nfs4_layout_stateid *ls;
> > > struct nfs4_stid *stid;
> > > - unsigned char typemask = NFS4_LAYOUT_STID;
> > > + unsigned short typemask = NFS4_LAYOUT_STID;
> > > __be32 status;
> > >
> > > if (create)
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 65fd5510323a..f3ba53a16105 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > return NULL;
> > > }
> > >
> > > +void nfs4_unhash_stid(struct nfs4_stid *s)
> > > +{
> > > + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> > > + atomic_dec(&s->sc_client->cl_admin_revoked);
> > > + s->sc_type = 0;
> > > +}
> > > +
> > > void
> > > nfs4_put_stid(struct nfs4_stid *s)
> > > {
> > > @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> > > return;
> > > }
> > > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > > + nfs4_unhash_stid(s);
> > > nfs4_free_cpntf_statelist(clp->net, s);
> > > spin_unlock(&clp->cl_lock);
> > > s->sc_free(s);
> > > @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> > > nfs4_put_stid(&dp->dl_stid);
> > > }
> > >
> > > -void nfs4_unhash_stid(struct nfs4_stid *s)
> > > -{
> > > - s->sc_type = 0;
> > > -}
> > > -
> > > /**
> > > * nfs4_delegation_exists - Discover if this delegation already exists
> > > * @clp: a pointer to the nfs4_client we're granting a delegation to
> > > @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> > > }
> > >
> > > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > > + nfs4_unhash_stid(s);
> > > list_add(&stp->st_locks, reaplist);
> > > }
> > >
> > > @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> > > nfs4_put_stateowner(&oo->oo_owner);
> > > }
> > >
> > > +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> > > + struct super_block *sb,
> > > + unsigned short sc_types)
> > > +{
> > > + unsigned long id, tmp;
> > > + struct nfs4_stid *stid;
> > > +
> > > + spin_lock(&clp->cl_lock);
> > > + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> > > + if ((stid->sc_type & sc_types) &&
> > > + stid->sc_file->fi_inode->i_sb == sb) {
> > > + refcount_inc(&stid->sc_count);
> > > + break;
> > > + }
> > > + spin_unlock(&clp->cl_lock);
> > > + return stid;
> > > +}
> > > +
> > > +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > > +{
> > > + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > + unsigned int idhashval;
> > > + unsigned short sc_types;
> > > +
> > > + sc_types = 0;
> > > +
> > > + spin_lock(&nn->client_lock);
> > > + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > > + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> > > + struct nfs4_client *clp;
> > > + retry:
> > > + list_for_each_entry(clp, head, cl_idhash) {
> > > + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> > > + sc_types);
> > > + if (stid) {
> > > + spin_unlock(&nn->client_lock);
> > > + switch (stid->sc_type) {
> > > + }
> > > + nfs4_put_stid(stid);
> > > + spin_lock(&nn->client_lock);
> > > + goto retry;
> > > + }
> > > + }
> > > + }
> > > + spin_unlock(&nn->client_lock);
> > > +}
> > > +
> > > static inline int
> > > hash_sessionid(struct nfs4_sessionid *sessionid)
> > > {
> > > @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> > > }
> > >
> > > static struct nfs4_stid *
> > > -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> > > +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> > > + unsigned short typemask)
> > > {
> > > struct nfs4_stid *s;
> > >
> > > @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> > > }
> > > seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> > > seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> > > + seq_printf(m, "admin-revoked states: %d\n",
> > > + atomic_read(&clp->cl_admin_revoked));
> > > drop_client(clp);
> > >
> > > return 0;
> > > @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > }
> > > if (!list_empty(&clp->cl_revoked))
> > > seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> > > + if (atomic_read(&clp->cl_admin_revoked))
> > > + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> > > out_no_session:
> > > if (conn)
> > > free_conn(conn);
> > > @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> > > status = nfserr_deleg_revoked;
> > > goto out;
> > > }
> > > + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> > > + nfs4_put_stid(&deleg->dl_stid);
> > > + status = nfserr_admin_revoked;
> > > + goto out;
> > > + }
> > > flags = share_access_to_flags(open->op_share_access);
> > > status = nfs4_check_delegmode(deleg, flags);
> > > if (status) {
> > > @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > > case NFS4_REVOKED_DELEG_STID:
> > > status = nfserr_deleg_revoked;
> > > break;
> > > + case NFS4_ADMIN_REVOKED_STID:
> > > + case NFS4_ADMIN_REVOKED_LOCK_STID:
> > > + case NFS4_ADMIN_REVOKED_DELEG_STID:
> > > + status = nfserr_admin_revoked;
> > > + break;
> > > case NFS4_OPEN_STID:
> > > case NFS4_LOCK_STID:
> > > status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> > > @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > >
> > > __be32
> > > nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > - stateid_t *stateid, unsigned char typemask,
> > > + stateid_t *stateid, unsigned short typemask,
> > > struct nfs4_stid **s, struct nfsd_net *nn)
> > > {
> > > __be32 status;
> > > @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > else if (typemask & NFS4_DELEG_STID)
> > > typemask |= NFS4_REVOKED_DELEG_STID;
> > >
> > > + if (typemask & NFS4_OPEN_STID)
> > > + typemask |= NFS4_ADMIN_REVOKED_STID;
> > > + if (typemask & NFS4_LOCK_STID)
> > > + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> > > + if (typemask & NFS4_DELEG_STID)
> > > + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> > > +
> > > if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> > > CLOSE_STATEID(stateid))
> > > return nfserr_bad_stateid;
> > > @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > return nfserr_deleg_revoked;
> > > return nfserr_bad_stateid;
> > > }
> > > + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> > > + nfs4_put_stid(stid);
> > > + return nfserr_admin_revoked;
> > > + }
> > > *s = stid;
> > > return nfs_ok;
> > > }
> > > @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> > > */
> > > static __be32
> > > nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> > > - stateid_t *stateid, char typemask,
> > > + stateid_t *stateid, unsigned short typemask,
> > > struct nfs4_ol_stateid **stpp,
> > > struct nfsd_net *nn)
> > > {
> > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > > index 739ed5bf71cd..50368eec86b0 100644
> > > --- a/fs/nfsd/nfsctl.c
> > > +++ b/fs/nfsd/nfsctl.c
> > > @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> > > * 3. Is that directory the root of an exported file system?
> > > */
> > > error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> > > + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> > >
> > > path_put(&path);
> > > return error;
> > > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > > index f5ff42f41ee7..d46203eac3c8 100644
> > > --- a/fs/nfsd/nfsd.h
> > > +++ b/fs/nfsd/nfsd.h
> > > @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> > > #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> > > #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> > > #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> > > +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> > > #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> > > #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> > > #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > index f96eaa8e9413..3af5ab55c978 100644
> > > --- a/fs/nfsd/state.h
> > > +++ b/fs/nfsd/state.h
> > > @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> > > */
> > > struct nfs4_stid {
> > > refcount_t sc_count;
> > > -#define NFS4_OPEN_STID 1
> > > -#define NFS4_LOCK_STID 2
> > > -#define NFS4_DELEG_STID 4
> > > + struct list_head sc_cp_list;
> > > + unsigned short sc_type;
> >
> > Should we just go ahead and make this a full 32-bit word? We seem to
> > keep adding flags to this field, and I doubt we're saving anything by
> > making this a short.
> >
> > > +#define NFS4_OPEN_STID BIT(0)
> > > +#define NFS4_LOCK_STID BIT(1)
> > > +#define NFS4_DELEG_STID BIT(2)
> > > /* For an open stateid kept around *only* to process close replays: */
> > > -#define NFS4_CLOSED_STID 8
> > > +#define NFS4_CLOSED_STID BIT(3)
> > > /* For a deleg stateid kept around only to process free_stateid's: */
> > > -#define NFS4_REVOKED_DELEG_STID 16
> > > -#define NFS4_CLOSED_DELEG_STID 32
> > > -#define NFS4_LAYOUT_STID 64
> > > - struct list_head sc_cp_list;
> > > - unsigned char sc_type;
> > > +#define NFS4_REVOKED_DELEG_STID BIT(4)
> > > +#define NFS4_CLOSED_DELEG_STID BIT(5)
> > > +#define NFS4_LAYOUT_STID BIT(6)
> > > +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> > > +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> > > +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> > > +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> > > + NFS4_ADMIN_REVOKED_LOCK_STID | \
> > > + NFS4_ADMIN_REVOKED_DELEG_STID)
> >
> > Not a specific criticism of these patches, since this problem preexists
> > them, but I really dislike the way that sc_type is used as a bitmask,
> > but also sort of like an enum. In some cases, we test for specific flags
> > in the mask, and in other cases (e.g. states_show), we treat them as
> > discrete values to feed it to a switch().
> >
> > Personally, I'd find this less confusing if we just treat this as a set
> > of flags full-stop. We could leave the low-order bits to show the real
> > type (open, lock, deleg, etc.) and just mask off the high-order bits
> > when we need to feed it to a switch statement.
> >
> > For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
> > could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
> > and leave the old type bit in place instead of changing it to a new
> > discrete sc_type value.
>
> I agree.
> Bits might be:
> OPEN
> LOCK
> DELEG
> LAYOUT
> CLOSED (combines with OPEN or DELEG)
> REVOKED (combines with DELEG)
> ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
> with DELEG)
>
> so we could go back to a char :-) Probably sensible to use unsigned int
> though.
>

Yeah, a u32 would be best I think. It'll just fill an existing hole on
my box (x86_64), according to pahole:

unsigned char sc_type; /* 24 1 */

/* XXX 3 bytes hole, try to pack */

> For updates the rule would be that bits can be set but never cleared so
> you don't need locking to read unless you care about a bit that can be
> changed.
>

Probably for the low order OPEN/LOCK/DELEG bits, the rule should be that
they never change. We can never convert from one to another since they
come out of different slabcaches. It's probably worthwhile to leave a
few low order bits carved out for new types in the future too. e.g.
directory delegations...

Maybe we should rename the field too? How about "sc_mode" since this is
starting to resemble the i_mode field in some ways (type and permissions
squashed together).

> cl_lock is held most often for updates, but not always. We should see
> if we can make that more consistent. Maybe we could use sc_lock
> instead.
> I'll have a look.
>

Awesome.

> Thanks,
> NeilBrown
>
>
> >
> > > stateid_t sc_stateid;
> > > spinlock_t sc_lock;
> > > struct nfs4_client *sc_client;
> > > @@ -372,6 +378,7 @@ struct nfs4_client {
> > > clientid_t cl_clientid; /* generated by server */
> > > nfs4_verifier cl_confirm; /* generated by server */
> > > u32 cl_minorversion;
> > > + atomic_t cl_admin_revoked; /* count of admin-revoked states */
> > > /* NFSv4.1 client implementation id: */
> > > struct xdr_netobj cl_nii_domain;
> > > struct xdr_netobj cl_nii_name;
> > > @@ -694,7 +701,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> > > stateid_t *stateid, int flags, struct nfsd_file **filp,
> > > struct nfs4_stid **cstid);
> > > __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > - stateid_t *stateid, unsigned char typemask,
> > > + stateid_t *stateid, unsigned short typemask,
> > > struct nfs4_stid **s, struct nfsd_net *nn);
> > > struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> > > void (*sc_free)(struct nfs4_stid *));
> > > @@ -736,6 +743,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
> > > }
> > > struct nfsd_file *find_any_file(struct nfs4_file *f);
> > >
> > > +#ifdef CONFIG_NFSD_V4
> > > +void nfsd4_revoke_states(struct net *net, struct super_block *sb);
> > > +#else
> > > +static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > > +{
> > > +}
> > > +#endif
> > > +
> > > /* grace period management */
> > > void nfsd4_end_grace(struct nfsd_net *nn);
> > >
> > > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> > > index fbc0ccb40424..e359d531402c 100644
> > > --- a/fs/nfsd/trace.h
> > > +++ b/fs/nfsd/trace.h
> > > @@ -648,6 +648,9 @@ TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> > > TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> > > TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> > > TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_STID);
> > > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_LOCK_STID);
> > > +TRACE_DEFINE_ENUM(NFS4_ADMIN_REVOKED_DELEG_STID);
> > >
> > > #define show_stid_type(x) \
> > > __print_flags(x, "|", \
> > > @@ -657,7 +660,10 @@ TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> > > { NFS4_CLOSED_STID, "CLOSED" }, \
> > > { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> > > { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> > > - { NFS4_LAYOUT_STID, "LAYOUT" })
> > > + { NFS4_LAYOUT_STID, "LAYOUT" }, \
> > > + { NFS4_ADMIN_REVOKED_STID, "ADMIN_REVOKED" }, \
> > > + { NFS4_ADMIN_REVOKED_LOCK_STID, "ADMIN_REVOKED_LOCK" }, \
> > > + { NFS4_ADMIN_REVOKED_DELEG_STID,"ADMIN_REVOKED_DELEG" })
> > >
> > > DECLARE_EVENT_CLASS(nfsd_stid_class,
> > > TP_PROTO(
> >
> > --
> > Jeff Layton <[email protected]>
> >
>

--
Jeff Layton <[email protected]>

2023-11-02 20:29:40

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state



> On Nov 2, 2023, at 1:24 PM, Jeff Layton <[email protected]> wrote:
>
> On Fri, 2023-11-03 at 07:03 +1100, NeilBrown wrote:
>> On Thu, 02 Nov 2023, Jeff Layton wrote:
>>> On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
>>>> The NFSv4 protocol allows state to be revoked by the admin and has error
>>>> codes which allow this to be communicated to the client.
>>>>
>>>> This patch
>>>> - introduces 3 new state-id types for revoked open, lock, and
>>>> delegation state. This requires the bitmask to be 'short',
>>>> not 'char'
>>>> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
>>>> - introduces a per-client counter of these states and returns
>>>> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
>>>> Decrement this when freeing any admin-revoked state.
>>>> - introduces stub code to find all interesting states for a given
>>>> superblock so they can be revoked via the 'unlock_filesystem'
>>>> file in /proc/fs/nfsd/
>>>> No actual states are handled yet.
>>>>
>>>> Signed-off-by: NeilBrown <[email protected]>
>>>> ---
>>>> fs/nfsd/nfs4layouts.c | 2 +-
>>>> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
>>>> fs/nfsd/nfsctl.c | 1 +
>>>> fs/nfsd/nfsd.h | 1 +
>>>> fs/nfsd/state.h | 35 +++++++++++-----
>>>> fs/nfsd/trace.h | 8 +++-
>>>> 6 files changed, 120 insertions(+), 20 deletions(-)
>>>>
>>>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
>>>> index 5e8096bc5eaa..09d0363bfbc4 100644
>>>> --- a/fs/nfsd/nfs4layouts.c
>>>> +++ b/fs/nfsd/nfs4layouts.c
>>>> @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
>>>> {
>>>> struct nfs4_layout_stateid *ls;
>>>> struct nfs4_stid *stid;
>>>> - unsigned char typemask = NFS4_LAYOUT_STID;
>>>> + unsigned short typemask = NFS4_LAYOUT_STID;
>>>> __be32 status;
>>>>
>>>> if (create)
>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>> index 65fd5510323a..f3ba53a16105 100644
>>>> --- a/fs/nfsd/nfs4state.c
>>>> +++ b/fs/nfsd/nfs4state.c
>>>> @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
>>>> return NULL;
>>>> }
>>>>
>>>> +void nfs4_unhash_stid(struct nfs4_stid *s)
>>>> +{
>>>> + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
>>>> + atomic_dec(&s->sc_client->cl_admin_revoked);
>>>> + s->sc_type = 0;
>>>> +}
>>>> +
>>>> void
>>>> nfs4_put_stid(struct nfs4_stid *s)
>>>> {
>>>> @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
>>>> return;
>>>> }
>>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>>>> + nfs4_unhash_stid(s);
>>>> nfs4_free_cpntf_statelist(clp->net, s);
>>>> spin_unlock(&clp->cl_lock);
>>>> s->sc_free(s);
>>>> @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
>>>> nfs4_put_stid(&dp->dl_stid);
>>>> }
>>>>
>>>> -void nfs4_unhash_stid(struct nfs4_stid *s)
>>>> -{
>>>> - s->sc_type = 0;
>>>> -}
>>>> -
>>>> /**
>>>> * nfs4_delegation_exists - Discover if this delegation already exists
>>>> * @clp: a pointer to the nfs4_client we're granting a delegation to
>>>> @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
>>>> }
>>>>
>>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>>>> + nfs4_unhash_stid(s);
>>>> list_add(&stp->st_locks, reaplist);
>>>> }
>>>>
>>>> @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
>>>> nfs4_put_stateowner(&oo->oo_owner);
>>>> }
>>>>
>>>> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
>>>> + struct super_block *sb,
>>>> + unsigned short sc_types)
>>>> +{
>>>> + unsigned long id, tmp;
>>>> + struct nfs4_stid *stid;
>>>> +
>>>> + spin_lock(&clp->cl_lock);
>>>> + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
>>>> + if ((stid->sc_type & sc_types) &&
>>>> + stid->sc_file->fi_inode->i_sb == sb) {
>>>> + refcount_inc(&stid->sc_count);
>>>> + break;
>>>> + }
>>>> + spin_unlock(&clp->cl_lock);
>>>> + return stid;
>>>> +}
>>>> +
>>>> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
>>>> +{
>>>> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>>>> + unsigned int idhashval;
>>>> + unsigned short sc_types;
>>>> +
>>>> + sc_types = 0;
>>>> +
>>>> + spin_lock(&nn->client_lock);
>>>> + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
>>>> + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
>>>> + struct nfs4_client *clp;
>>>> + retry:
>>>> + list_for_each_entry(clp, head, cl_idhash) {
>>>> + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
>>>> + sc_types);
>>>> + if (stid) {
>>>> + spin_unlock(&nn->client_lock);
>>>> + switch (stid->sc_type) {
>>>> + }
>>>> + nfs4_put_stid(stid);
>>>> + spin_lock(&nn->client_lock);
>>>> + goto retry;
>>>> + }
>>>> + }
>>>> + }
>>>> + spin_unlock(&nn->client_lock);
>>>> +}
>>>> +
>>>> static inline int
>>>> hash_sessionid(struct nfs4_sessionid *sessionid)
>>>> {
>>>> @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
>>>> }
>>>>
>>>> static struct nfs4_stid *
>>>> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
>>>> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
>>>> + unsigned short typemask)
>>>> {
>>>> struct nfs4_stid *s;
>>>>
>>>> @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
>>>> }
>>>> seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
>>>> seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
>>>> + seq_printf(m, "admin-revoked states: %d\n",
>>>> + atomic_read(&clp->cl_admin_revoked));
>>>> drop_client(clp);
>>>>
>>>> return 0;
>>>> @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>> }
>>>> if (!list_empty(&clp->cl_revoked))
>>>> seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
>>>> + if (atomic_read(&clp->cl_admin_revoked))
>>>> + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
>>>> out_no_session:
>>>> if (conn)
>>>> free_conn(conn);
>>>> @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
>>>> status = nfserr_deleg_revoked;
>>>> goto out;
>>>> }
>>>> + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
>>>> + nfs4_put_stid(&deleg->dl_stid);
>>>> + status = nfserr_admin_revoked;
>>>> + goto out;
>>>> + }
>>>> flags = share_access_to_flags(open->op_share_access);
>>>> status = nfs4_check_delegmode(deleg, flags);
>>>> if (status) {
>>>> @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>>>> case NFS4_REVOKED_DELEG_STID:
>>>> status = nfserr_deleg_revoked;
>>>> break;
>>>> + case NFS4_ADMIN_REVOKED_STID:
>>>> + case NFS4_ADMIN_REVOKED_LOCK_STID:
>>>> + case NFS4_ADMIN_REVOKED_DELEG_STID:
>>>> + status = nfserr_admin_revoked;
>>>> + break;
>>>> case NFS4_OPEN_STID:
>>>> case NFS4_LOCK_STID:
>>>> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
>>>> @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>>>>
>>>> __be32
>>>> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>>>> - stateid_t *stateid, unsigned char typemask,
>>>> + stateid_t *stateid, unsigned short typemask,
>>>> struct nfs4_stid **s, struct nfsd_net *nn)
>>>> {
>>>> __be32 status;
>>>> @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>>>> else if (typemask & NFS4_DELEG_STID)
>>>> typemask |= NFS4_REVOKED_DELEG_STID;
>>>>
>>>> + if (typemask & NFS4_OPEN_STID)
>>>> + typemask |= NFS4_ADMIN_REVOKED_STID;
>>>> + if (typemask & NFS4_LOCK_STID)
>>>> + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
>>>> + if (typemask & NFS4_DELEG_STID)
>>>> + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
>>>> +
>>>> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
>>>> CLOSE_STATEID(stateid))
>>>> return nfserr_bad_stateid;
>>>> @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>>>> return nfserr_deleg_revoked;
>>>> return nfserr_bad_stateid;
>>>> }
>>>> + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
>>>> + nfs4_put_stid(stid);
>>>> + return nfserr_admin_revoked;
>>>> + }
>>>> *s = stid;
>>>> return nfs_ok;
>>>> }
>>>> @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
>>>> */
>>>> static __be32
>>>> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
>>>> - stateid_t *stateid, char typemask,
>>>> + stateid_t *stateid, unsigned short typemask,
>>>> struct nfs4_ol_stateid **stpp,
>>>> struct nfsd_net *nn)
>>>> {
>>>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
>>>> index 739ed5bf71cd..50368eec86b0 100644
>>>> --- a/fs/nfsd/nfsctl.c
>>>> +++ b/fs/nfsd/nfsctl.c
>>>> @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
>>>> * 3. Is that directory the root of an exported file system?
>>>> */
>>>> error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
>>>> + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
>>>>
>>>> path_put(&path);
>>>> return error;
>>>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>>>> index f5ff42f41ee7..d46203eac3c8 100644
>>>> --- a/fs/nfsd/nfsd.h
>>>> +++ b/fs/nfsd/nfsd.h
>>>> @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
>>>> #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
>>>> #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
>>>> #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
>>>> +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
>>>> #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
>>>> #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
>>>> #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
>>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>>> index f96eaa8e9413..3af5ab55c978 100644
>>>> --- a/fs/nfsd/state.h
>>>> +++ b/fs/nfsd/state.h
>>>> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
>>>> */
>>>> struct nfs4_stid {
>>>> refcount_t sc_count;
>>>> -#define NFS4_OPEN_STID 1
>>>> -#define NFS4_LOCK_STID 2
>>>> -#define NFS4_DELEG_STID 4
>>>> + struct list_head sc_cp_list;
>>>> + unsigned short sc_type;
>>>
>>> Should we just go ahead and make this a full 32-bit word? We seem to
>>> keep adding flags to this field, and I doubt we're saving anything by
>>> making this a short.
>>>
>>>> +#define NFS4_OPEN_STID BIT(0)
>>>> +#define NFS4_LOCK_STID BIT(1)
>>>> +#define NFS4_DELEG_STID BIT(2)
>>>> /* For an open stateid kept around *only* to process close replays: */
>>>> -#define NFS4_CLOSED_STID 8
>>>> +#define NFS4_CLOSED_STID BIT(3)
>>>> /* For a deleg stateid kept around only to process free_stateid's: */
>>>> -#define NFS4_REVOKED_DELEG_STID 16
>>>> -#define NFS4_CLOSED_DELEG_STID 32
>>>> -#define NFS4_LAYOUT_STID 64
>>>> - struct list_head sc_cp_list;
>>>> - unsigned char sc_type;
>>>> +#define NFS4_REVOKED_DELEG_STID BIT(4)
>>>> +#define NFS4_CLOSED_DELEG_STID BIT(5)
>>>> +#define NFS4_LAYOUT_STID BIT(6)
>>>> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
>>>> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
>>>> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
>>>> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
>>>> + NFS4_ADMIN_REVOKED_LOCK_STID | \
>>>> + NFS4_ADMIN_REVOKED_DELEG_STID)
>>>
>>> Not a specific criticism of these patches, since this problem preexists
>>> them, but I really dislike the way that sc_type is used as a bitmask,
>>> but also sort of like an enum. In some cases, we test for specific flags
>>> in the mask, and in other cases (e.g. states_show), we treat them as
>>> discrete values to feed it to a switch().
>>>
>>> Personally, I'd find this less confusing if we just treat this as a set
>>> of flags full-stop. We could leave the low-order bits to show the real
>>> type (open, lock, deleg, etc.) and just mask off the high-order bits
>>> when we need to feed it to a switch statement.
>>>
>>> For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
>>> could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
>>> and leave the old type bit in place instead of changing it to a new
>>> discrete sc_type value.
>>
>> I agree.
>> Bits might be:
>> OPEN
>> LOCK
>> DELEG
>> LAYOUT
>> CLOSED (combines with OPEN or DELEG)
>> REVOKED (combines with DELEG)
>> ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
>> with DELEG)
>>
>> so we could go back to a char :-) Probably sensible to use unsigned int
>> though.
>>
>
> Yeah, a u32 would be best I think. It'll just fill an existing hole on
> my box (x86_64), according to pahole:
>
> unsigned char sc_type; /* 24 1 */
>
> /* XXX 3 bytes hole, try to pack */
>
>> For updates the rule would be that bits can be set but never cleared so
>> you don't need locking to read unless you care about a bit that can be
>> changed.
>>
>
> Probably for the low order OPEN/LOCK/DELEG bits, the rule should be that
> they never change. We can never convert from one to another since they
> come out of different slabcaches. It's probably worthwhile to leave a
> few low order bits carved out for new types in the future too. e.g.
> directory delegations...
>
> Maybe we should rename the field too? How about "sc_mode" since this is
> starting to resemble the i_mode field in some ways (type and permissions
> squashed together).

In that case, two separate u16 fields might be better.


--
Chuck Lever


2023-11-03 01:08:35

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Fri, 03 Nov 2023, Chuck Lever III wrote:
>
> > On Nov 2, 2023, at 1:24 PM, Jeff Layton <[email protected]> wrote:
> >
> > On Fri, 2023-11-03 at 07:03 +1100, NeilBrown wrote:
> >> On Thu, 02 Nov 2023, Jeff Layton wrote:
> >>> On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> >>>> The NFSv4 protocol allows state to be revoked by the admin and has error
> >>>> codes which allow this to be communicated to the client.
> >>>>
> >>>> This patch
> >>>> - introduces 3 new state-id types for revoked open, lock, and
> >>>> delegation state. This requires the bitmask to be 'short',
> >>>> not 'char'
> >>>> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> >>>> - introduces a per-client counter of these states and returns
> >>>> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> >>>> Decrement this when freeing any admin-revoked state.
> >>>> - introduces stub code to find all interesting states for a given
> >>>> superblock so they can be revoked via the 'unlock_filesystem'
> >>>> file in /proc/fs/nfsd/
> >>>> No actual states are handled yet.
> >>>>
> >>>> Signed-off-by: NeilBrown <[email protected]>
> >>>> ---
> >>>> fs/nfsd/nfs4layouts.c | 2 +-
> >>>> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> >>>> fs/nfsd/nfsctl.c | 1 +
> >>>> fs/nfsd/nfsd.h | 1 +
> >>>> fs/nfsd/state.h | 35 +++++++++++-----
> >>>> fs/nfsd/trace.h | 8 +++-
> >>>> 6 files changed, 120 insertions(+), 20 deletions(-)
> >>>>
> >>>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> >>>> index 5e8096bc5eaa..09d0363bfbc4 100644
> >>>> --- a/fs/nfsd/nfs4layouts.c
> >>>> +++ b/fs/nfsd/nfs4layouts.c
> >>>> @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> >>>> {
> >>>> struct nfs4_layout_stateid *ls;
> >>>> struct nfs4_stid *stid;
> >>>> - unsigned char typemask = NFS4_LAYOUT_STID;
> >>>> + unsigned short typemask = NFS4_LAYOUT_STID;
> >>>> __be32 status;
> >>>>
> >>>> if (create)
> >>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >>>> index 65fd5510323a..f3ba53a16105 100644
> >>>> --- a/fs/nfsd/nfs4state.c
> >>>> +++ b/fs/nfsd/nfs4state.c
> >>>> @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> >>>> return NULL;
> >>>> }
> >>>>
> >>>> +void nfs4_unhash_stid(struct nfs4_stid *s)
> >>>> +{
> >>>> + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> >>>> + atomic_dec(&s->sc_client->cl_admin_revoked);
> >>>> + s->sc_type = 0;
> >>>> +}
> >>>> +
> >>>> void
> >>>> nfs4_put_stid(struct nfs4_stid *s)
> >>>> {
> >>>> @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> >>>> return;
> >>>> }
> >>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> >>>> + nfs4_unhash_stid(s);
> >>>> nfs4_free_cpntf_statelist(clp->net, s);
> >>>> spin_unlock(&clp->cl_lock);
> >>>> s->sc_free(s);
> >>>> @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> >>>> nfs4_put_stid(&dp->dl_stid);
> >>>> }
> >>>>
> >>>> -void nfs4_unhash_stid(struct nfs4_stid *s)
> >>>> -{
> >>>> - s->sc_type = 0;
> >>>> -}
> >>>> -
> >>>> /**
> >>>> * nfs4_delegation_exists - Discover if this delegation already exists
> >>>> * @clp: a pointer to the nfs4_client we're granting a delegation to
> >>>> @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> >>>> }
> >>>>
> >>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> >>>> + nfs4_unhash_stid(s);
> >>>> list_add(&stp->st_locks, reaplist);
> >>>> }
> >>>>
> >>>> @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> >>>> nfs4_put_stateowner(&oo->oo_owner);
> >>>> }
> >>>>
> >>>> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> >>>> + struct super_block *sb,
> >>>> + unsigned short sc_types)
> >>>> +{
> >>>> + unsigned long id, tmp;
> >>>> + struct nfs4_stid *stid;
> >>>> +
> >>>> + spin_lock(&clp->cl_lock);
> >>>> + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> >>>> + if ((stid->sc_type & sc_types) &&
> >>>> + stid->sc_file->fi_inode->i_sb == sb) {
> >>>> + refcount_inc(&stid->sc_count);
> >>>> + break;
> >>>> + }
> >>>> + spin_unlock(&clp->cl_lock);
> >>>> + return stid;
> >>>> +}
> >>>> +
> >>>> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> >>>> +{
> >>>> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> >>>> + unsigned int idhashval;
> >>>> + unsigned short sc_types;
> >>>> +
> >>>> + sc_types = 0;
> >>>> +
> >>>> + spin_lock(&nn->client_lock);
> >>>> + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> >>>> + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> >>>> + struct nfs4_client *clp;
> >>>> + retry:
> >>>> + list_for_each_entry(clp, head, cl_idhash) {
> >>>> + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> >>>> + sc_types);
> >>>> + if (stid) {
> >>>> + spin_unlock(&nn->client_lock);
> >>>> + switch (stid->sc_type) {
> >>>> + }
> >>>> + nfs4_put_stid(stid);
> >>>> + spin_lock(&nn->client_lock);
> >>>> + goto retry;
> >>>> + }
> >>>> + }
> >>>> + }
> >>>> + spin_unlock(&nn->client_lock);
> >>>> +}
> >>>> +
> >>>> static inline int
> >>>> hash_sessionid(struct nfs4_sessionid *sessionid)
> >>>> {
> >>>> @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> >>>> }
> >>>>
> >>>> static struct nfs4_stid *
> >>>> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> >>>> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> >>>> + unsigned short typemask)
> >>>> {
> >>>> struct nfs4_stid *s;
> >>>>
> >>>> @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> >>>> }
> >>>> seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> >>>> seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> >>>> + seq_printf(m, "admin-revoked states: %d\n",
> >>>> + atomic_read(&clp->cl_admin_revoked));
> >>>> drop_client(clp);
> >>>>
> >>>> return 0;
> >>>> @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>> }
> >>>> if (!list_empty(&clp->cl_revoked))
> >>>> seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> >>>> + if (atomic_read(&clp->cl_admin_revoked))
> >>>> + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> >>>> out_no_session:
> >>>> if (conn)
> >>>> free_conn(conn);
> >>>> @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> >>>> status = nfserr_deleg_revoked;
> >>>> goto out;
> >>>> }
> >>>> + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> >>>> + nfs4_put_stid(&deleg->dl_stid);
> >>>> + status = nfserr_admin_revoked;
> >>>> + goto out;
> >>>> + }
> >>>> flags = share_access_to_flags(open->op_share_access);
> >>>> status = nfs4_check_delegmode(deleg, flags);
> >>>> if (status) {
> >>>> @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> >>>> case NFS4_REVOKED_DELEG_STID:
> >>>> status = nfserr_deleg_revoked;
> >>>> break;
> >>>> + case NFS4_ADMIN_REVOKED_STID:
> >>>> + case NFS4_ADMIN_REVOKED_LOCK_STID:
> >>>> + case NFS4_ADMIN_REVOKED_DELEG_STID:
> >>>> + status = nfserr_admin_revoked;
> >>>> + break;
> >>>> case NFS4_OPEN_STID:
> >>>> case NFS4_LOCK_STID:
> >>>> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> >>>> @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> >>>>
> >>>> __be32
> >>>> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >>>> - stateid_t *stateid, unsigned char typemask,
> >>>> + stateid_t *stateid, unsigned short typemask,
> >>>> struct nfs4_stid **s, struct nfsd_net *nn)
> >>>> {
> >>>> __be32 status;
> >>>> @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >>>> else if (typemask & NFS4_DELEG_STID)
> >>>> typemask |= NFS4_REVOKED_DELEG_STID;
> >>>>
> >>>> + if (typemask & NFS4_OPEN_STID)
> >>>> + typemask |= NFS4_ADMIN_REVOKED_STID;
> >>>> + if (typemask & NFS4_LOCK_STID)
> >>>> + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> >>>> + if (typemask & NFS4_DELEG_STID)
> >>>> + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> >>>> +
> >>>> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> >>>> CLOSE_STATEID(stateid))
> >>>> return nfserr_bad_stateid;
> >>>> @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >>>> return nfserr_deleg_revoked;
> >>>> return nfserr_bad_stateid;
> >>>> }
> >>>> + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> >>>> + nfs4_put_stid(stid);
> >>>> + return nfserr_admin_revoked;
> >>>> + }
> >>>> *s = stid;
> >>>> return nfs_ok;
> >>>> }
> >>>> @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> >>>> */
> >>>> static __be32
> >>>> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> >>>> - stateid_t *stateid, char typemask,
> >>>> + stateid_t *stateid, unsigned short typemask,
> >>>> struct nfs4_ol_stateid **stpp,
> >>>> struct nfsd_net *nn)
> >>>> {
> >>>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> >>>> index 739ed5bf71cd..50368eec86b0 100644
> >>>> --- a/fs/nfsd/nfsctl.c
> >>>> +++ b/fs/nfsd/nfsctl.c
> >>>> @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> >>>> * 3. Is that directory the root of an exported file system?
> >>>> */
> >>>> error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> >>>> + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> >>>>
> >>>> path_put(&path);
> >>>> return error;
> >>>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> >>>> index f5ff42f41ee7..d46203eac3c8 100644
> >>>> --- a/fs/nfsd/nfsd.h
> >>>> +++ b/fs/nfsd/nfsd.h
> >>>> @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> >>>> #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> >>>> #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> >>>> #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> >>>> +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> >>>> #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> >>>> #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> >>>> #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> >>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> >>>> index f96eaa8e9413..3af5ab55c978 100644
> >>>> --- a/fs/nfsd/state.h
> >>>> +++ b/fs/nfsd/state.h
> >>>> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> >>>> */
> >>>> struct nfs4_stid {
> >>>> refcount_t sc_count;
> >>>> -#define NFS4_OPEN_STID 1
> >>>> -#define NFS4_LOCK_STID 2
> >>>> -#define NFS4_DELEG_STID 4
> >>>> + struct list_head sc_cp_list;
> >>>> + unsigned short sc_type;
> >>>
> >>> Should we just go ahead and make this a full 32-bit word? We seem to
> >>> keep adding flags to this field, and I doubt we're saving anything by
> >>> making this a short.
> >>>
> >>>> +#define NFS4_OPEN_STID BIT(0)
> >>>> +#define NFS4_LOCK_STID BIT(1)
> >>>> +#define NFS4_DELEG_STID BIT(2)
> >>>> /* For an open stateid kept around *only* to process close replays: */
> >>>> -#define NFS4_CLOSED_STID 8
> >>>> +#define NFS4_CLOSED_STID BIT(3)
> >>>> /* For a deleg stateid kept around only to process free_stateid's: */
> >>>> -#define NFS4_REVOKED_DELEG_STID 16
> >>>> -#define NFS4_CLOSED_DELEG_STID 32
> >>>> -#define NFS4_LAYOUT_STID 64
> >>>> - struct list_head sc_cp_list;
> >>>> - unsigned char sc_type;
> >>>> +#define NFS4_REVOKED_DELEG_STID BIT(4)
> >>>> +#define NFS4_CLOSED_DELEG_STID BIT(5)
> >>>> +#define NFS4_LAYOUT_STID BIT(6)
> >>>> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> >>>> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> >>>> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> >>>> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> >>>> + NFS4_ADMIN_REVOKED_LOCK_STID | \
> >>>> + NFS4_ADMIN_REVOKED_DELEG_STID)
> >>>
> >>> Not a specific criticism of these patches, since this problem preexists
> >>> them, but I really dislike the way that sc_type is used as a bitmask,
> >>> but also sort of like an enum. In some cases, we test for specific flags
> >>> in the mask, and in other cases (e.g. states_show), we treat them as
> >>> discrete values to feed it to a switch().
> >>>
> >>> Personally, I'd find this less confusing if we just treat this as a set
> >>> of flags full-stop. We could leave the low-order bits to show the real
> >>> type (open, lock, deleg, etc.) and just mask off the high-order bits
> >>> when we need to feed it to a switch statement.
> >>>
> >>> For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
> >>> could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
> >>> and leave the old type bit in place instead of changing it to a new
> >>> discrete sc_type value.
> >>
> >> I agree.
> >> Bits might be:
> >> OPEN
> >> LOCK
> >> DELEG
> >> LAYOUT
> >> CLOSED (combines with OPEN or DELEG)
> >> REVOKED (combines with DELEG)
> >> ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
> >> with DELEG)
> >>
> >> so we could go back to a char :-) Probably sensible to use unsigned int
> >> though.
> >>
> >
> > Yeah, a u32 would be best I think. It'll just fill an existing hole on
> > my box (x86_64), according to pahole:
> >
> > unsigned char sc_type; /* 24 1 */
> >
> > /* XXX 3 bytes hole, try to pack */
> >
> >> For updates the rule would be that bits can be set but never cleared so
> >> you don't need locking to read unless you care about a bit that can be
> >> changed.
> >>
> >
> > Probably for the low order OPEN/LOCK/DELEG bits, the rule should be that
> > they never change. We can never convert from one to another since they
> > come out of different slabcaches. It's probably worthwhile to leave a
> > few low order bits carved out for new types in the future too. e.g.
> > directory delegations...
> >
> > Maybe we should rename the field too? How about "sc_mode" since this is
> > starting to resemble the i_mode field in some ways (type and permissions
> > squashed together).
>
> In that case, two separate u16 fields might be better.
>

Here is a first attempt. It compiles but I haven't tried running or
thought about what testing would be appropriate.
I'll be working on other things next week but I hope to pick this up
again the following week and process any feedback, then see how my
admin-revoke patch set fits on this new code.

All comments most welcome.

Thanks,
NeilBrown


From: NeilBrown <[email protected]>
Date: Fri, 3 Nov 2023 11:43:55 +1100
Subject: [PATCH] nfsd: tidy up sc_type

sc_type identifies the type of a state - open, lock, deleg, layout - and
also the status of a state - closed or revoked.

This is a bit untidy and could get worse when "admin-revoked" states are
added. So try to clean it up.

The type is now all that is stored in sc_type. This is zero when the
state is first added to the idr (causing it to be ignored), and if then
set appropriately once it is fully initialised. It is set under
->cl_lock to ensure atomicity w.r.t lookup. It is (now) never cleared.

This is still a bit-set even though at most one bit is set. This allows
lookup functions to be given a bitmap of acceptable types.

cl_type is now an unsigned short rather than char. There is no value in
restricting to just 8 bits.

The status is stored in a separate short named "cl_status". It has two
flags: NFS4_STID_CLOSED and NFS4_STID_REVOKED.
CLOSED combines NFS4_CLOSED_STID, NFS4_CLOSED_DELEG_STID, and is used
for LOCK_STID instead of setting the sc_type to zero.
These flags are only ever set, never cleared.
For deleg stateids they are set under the global state_lock.
For open and lock stateids they are set under ->cl_lock.

Other changes here - some of which could be split out...

- When a delegation is revoked, the type was previously set to
NFS4_CLOSED_DELEG_STID and then NFS4_REVOKED_DELEG_STID.
This might open a race window. That window no longer exists.

- NFS4_STID_REVOKED (previously NFS4_REVOKED_DELEG_STID) is only set
for ->minorversion>0, so I removed all the tests on minorversion when
that status has been detected.

- nfs4_unhash_stid() has been removed, and we never set sc_type = 0.
This was only used for LOCK stids and they now use NFS4_STID_CLOSED

- ->cl_lock is now hel when hash_delegation_locked() is called, so
that the locking rules for setting ->sc_type are followed.

Signed-off-by: NeilBrown <[email protected]>
---
fs/nfsd/nfs4layouts.c | 4 +-
fs/nfsd/nfs4state.c | 165 ++++++++++++++++++++----------------------
fs/nfsd/state.h | 37 +++++++---
fs/nfsd/trace.h | 25 +++----
4 files changed, 119 insertions(+), 112 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 5e8096bc5eaa..678bef3a7f15 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -269,13 +269,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
- unsigned char typemask = NFS4_LAYOUT_STID;
+ unsigned short typemask = NFS4_LAYOUT_STID;
__be32 status;

if (create)
typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);

- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+ status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status)
goto out;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 65fd5510323a..551a86a796ed 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1265,11 +1265,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
nfs4_put_stid(&dp->dl_stid);
}

-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
- s->sc_type = 0;
-}
-
/**
* nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
@@ -1317,6 +1312,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)

lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ lockdep_assert_held(&clp->cl_lock);

if (nfs4_delegation_exists(clp, fp))
return -EAGAIN;
@@ -1333,7 +1329,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
}

static bool
-unhash_delegation_locked(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short state)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;

@@ -1342,7 +1338,10 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
if (!delegation_hashed(dp))
return false;

- dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+ if (dp->dl_stid.sc_client->cl_minorversion == 0)
+ state = NFS4_STID_CLOSED;
+ dp->dl_stid.sc_status |= state | NFS4_STID_CLOSED;
+
/* Ensure that deleg break won't try to requeue it */
++dp->dl_time;
spin_lock(&fp->fi_lock);
@@ -1358,7 +1357,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
bool unhashed;

spin_lock(&state_lock);
- unhashed = unhash_delegation_locked(dp);
+ unhashed = unhash_delegation_locked(dp, NFS4_STID_CLOSED);
spin_unlock(&state_lock);
if (unhashed)
destroy_unhashed_deleg(dp);
@@ -1372,9 +1371,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)

trace_nfsd_stid_revoke(&dp->dl_stid);

- if (clp->cl_minorversion) {
+ if (dp->dl_stid.sc_status & NFS4_STID_REVOKED) {
spin_lock(&clp->cl_lock);
- dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
@@ -1382,8 +1380,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
destroy_unhashed_deleg(dp);
}

-/*
- * SETCLIENTID state
+/*
+ * SETCLIENTID state
*/

static unsigned int clientid_hashval(u32 id)
@@ -1546,7 +1544,6 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
if (!unhash_ol_stateid(stp))
return false;
list_del_init(&stp->st_locks);
- nfs4_unhash_stid(&stp->st_stid);
return true;
}

@@ -1557,6 +1554,7 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp)

spin_lock(&clp->cl_lock);
unhashed = unhash_lock_stateid(stp);
+ stp->st_stid.sc_status |= NFS4_STID_CLOSED;
spin_unlock(&clp->cl_lock);
if (unhashed)
nfs4_put_stid(&stp->st_stid);
@@ -1625,6 +1623,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
LIST_HEAD(reaplist);

spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= NFS4_STID_CLOSED;
if (unhash_open_stateid(stp, &reaplist))
put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&stp->st_stid.sc_client->cl_lock);
@@ -2233,7 +2232,7 @@ __destroy_client(struct nfs4_client *clp)
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- WARN_ON(!unhash_delegation_locked(dp));
+ WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -2465,14 +2464,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
}

static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+ unsigned short typemask, unsigned short ok_states)
{
struct nfs4_stid *s;

spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, t);
if (s != NULL) {
- if (typemask & s->sc_type)
+ if ((s->sc_status & ~ok_states) == 0 &&
+ (typemask & s->sc_type))
refcount_inc(&s->sc_count);
else
s = NULL;
@@ -4582,7 +4583,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
continue;
if (local->st_stateowner != &oo->oo_owner)
continue;
- if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+ if (local->st_stid.sc_type == NFS4_OPEN_STID &&
+ !(local->st_stid.sc_status & NFS4_STID_CLOSED)) {
ret = local;
refcount_inc(&ret->st_stid.sc_count);
break;
@@ -4596,17 +4598,10 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
{
__be32 ret = nfs_ok;

- switch (s->sc_type) {
- default:
- break;
- case 0:
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
- ret = nfserr_bad_stateid;
- break;
- case NFS4_REVOKED_DELEG_STID:
+ if (s->sc_status & NFS4_STID_REVOKED)
ret = nfserr_deleg_revoked;
- }
+ else if (s->sc_status & NFS4_STID_CLOSED)
+ ret = nfserr_bad_stateid;
return ret;
}

@@ -4919,9 +4914,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,

trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);

- if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
- dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- return 1;
+ if (dp->dl_stid.sc_status)
+ /* CLOSED or REVOKED */
+ return 1;

switch (task->tk_status) {
case 0:
@@ -5170,8 +5165,7 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
{
struct nfs4_stid *ret;

- ret = find_stateid_by_type(cl, s,
- NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
+ ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID, 0);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -5194,10 +5188,9 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
- if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
nfs4_put_stid(&deleg->dl_stid);
- if (cl->cl_minorversion)
- status = nfserr_deleg_revoked;
+ status = nfserr_deleg_revoked;
goto out;
}
flags = share_access_to_flags(open->op_share_access);
@@ -5609,12 +5602,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
goto out_unlock;

spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
if (fp->fi_had_conflict)
status = -EAGAIN;
else
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
spin_unlock(&state_lock);

if (status)
@@ -5840,7 +5835,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
} else {
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
if (status) {
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
goto out;
@@ -6232,7 +6226,7 @@ nfs4_laundromat(struct nfsd_net *nn)
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (!state_expired(&lt, dp->dl_time))
break;
- WARN_ON(!unhash_delegation_locked(dp));
+ WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_REVOKED));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -6471,22 +6465,20 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
if (status)
goto out_unlock;
+ status = nfsd4_verify_open_stid(s);
+ if (status)
+ goto out_unlock;
+
switch (s->sc_type) {
case NFS4_DELEG_STID:
status = nfs_ok;
break;
- case NFS4_REVOKED_DELEG_STID:
- status = nfserr_deleg_revoked;
- break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
status = nfsd4_check_openowner_confirmed(openlockstateid(s));
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
- fallthrough;
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
status = nfserr_bad_stateid;
}
out_unlock:
@@ -6496,7 +6488,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)

__be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statemask,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
@@ -6507,10 +6500,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
* only return revoked delegations if explicitly asked.
* otherwise we report revoked or bad_stateid status.
*/
- if (typemask & NFS4_REVOKED_DELEG_STID)
+ if (statemask & NFS4_STID_REVOKED)
return_revoked = true;
- else if (typemask & NFS4_DELEG_STID)
- typemask |= NFS4_REVOKED_DELEG_STID;

if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
@@ -6523,14 +6514,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
}
if (status)
return status;
- stid = find_stateid_by_type(cstate->clp, stateid, typemask);
+ stid = find_stateid_by_type(cstate->clp, stateid,
+ typemask, statemask & NFS4_STID_CLOSED);
if (!stid)
return nfserr_bad_stateid;
- if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ if ((stid->sc_status & NFS4_STID_REVOKED) && !return_revoked) {
nfs4_put_stid(stid);
- if (cstate->minorversion)
- return nfserr_deleg_revoked;
- return nfserr_bad_stateid;
+ return nfserr_deleg_revoked;
}
*s = stid;
return nfs_ok;
@@ -6541,7 +6531,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
{
struct nfsd_file *ret = NULL;

- if (!s)
+ if (!s || s->sc_status)
return NULL;

switch (s->sc_type) {
@@ -6664,7 +6654,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
goto out;

*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
+ NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+ 0);
if (*stid)
status = nfs_ok;
else
@@ -6722,7 +6713,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,

status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
- &s, nn);
+ 0, &s, nn);
if (status == nfserr_bad_stateid)
status = find_cpntf_state(nn, stateid, &s);
if (status)
@@ -6740,9 +6731,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
case NFS4_LOCK_STID:
status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
- default:
- status = nfserr_bad_stateid;
- break;
}
if (status)
goto out;
@@ -6821,11 +6809,20 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,

spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
- if (!s)
+ if (!s || s->sc_status & NFS4_STID_CLOSED)
goto out_unlock;
spin_lock(&s->sc_lock);
switch (s->sc_type) {
case NFS4_DELEG_STID:
+ if (s->sc_status & NFS4_STID_REVOKED) {
+ spin_unlock(&s->sc_lock);
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
ret = nfserr_locks_held;
break;
case NFS4_OPEN_STID:
@@ -6840,14 +6837,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
- case NFS4_REVOKED_DELEG_STID:
- spin_unlock(&s->sc_lock);
- dp = delegstateid(s);
- list_del_init(&dp->dl_recall_lru);
- spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
- goto out;
/* Default falls through and returns nfserr_bad_stateid */
}
spin_unlock(&s->sc_lock);
@@ -6890,6 +6879,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* @seqid: seqid (provided by client)
* @stateid: stateid (provided by client)
* @typemask: mask of allowable types for this operation
+ * @statemask: mask of allowed states: 0 or STID_CLOSED
* @stpp: return pointer for the stateid found
* @nn: net namespace for request
*
@@ -6899,7 +6889,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
- stateid_t *stateid, char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statemask,
struct nfs4_ol_stateid **stpp,
struct nfsd_net *nn)
{
@@ -6910,7 +6901,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);

*stpp = NULL;
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid,
+ typemask, statemask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
@@ -6932,7 +6924,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
struct nfs4_ol_stateid *stp;

status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
- NFS4_OPEN_STID, &stp, nn);
+ NFS4_OPEN_STID, 0, &stp, nn);
if (status)
return status;
oo = openowner(stp->st_stateowner);
@@ -6963,8 +6955,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;

status = nfs4_preprocess_seqid_op(cstate,
- oc->oc_seqid, &oc->oc_req_stateid,
- NFS4_OPEN_STID, &stp, nn);
+ oc->oc_seqid, &oc->oc_req_stateid,
+ NFS4_OPEN_STID, 0, &stp, nn);
if (status)
goto out;
oo = openowner(stp->st_stateowner);
@@ -7094,18 +7086,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

- dprintk("NFSD: nfsd4_close on file %pd\n",
+ dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);

status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
- &close->cl_stateid,
- NFS4_OPEN_STID|NFS4_CLOSED_STID,
- &stp, nn);
+ &close->cl_stateid,
+ NFS4_OPEN_STID, NFS4_STID_CLOSED,
+ &stp, nn);
nfsd4_bump_seqid(cstate, status);
if (status)
- goto out;
+ goto out;

- stp->st_stid.sc_type = NFS4_CLOSED_STID;
+ spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= NFS4_STID_CLOSED;
+ spin_unlock(&stp->st_stid.sc_client->cl_lock);

/*
* Technically we don't _really_ have to increment or copy it, since
@@ -7147,7 +7141,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;

- status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, 0, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -7601,9 +7595,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&lock_stp, &new);
} else {
status = nfs4_preprocess_seqid_op(cstate,
- lock->lk_old_lock_seqid,
- &lock->lk_old_lock_stateid,
- NFS4_LOCK_STID, &lock_stp, nn);
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ NFS4_LOCK_STID, 0, &lock_stp,
+ nn);
}
if (status)
goto out;
@@ -7916,8 +7911,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;

status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
- &locku->lu_stateid, NFS4_LOCK_STID,
- &stp, nn);
+ &locku->lu_stateid, NFS4_LOCK_STID, 0,
+ &stp, nn);
if (status)
goto out;
nf = find_any_file(stp->st_stid.sc_file);
@@ -8347,7 +8342,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- WARN_ON(!unhash_delegation_locked(dp));
+ WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index f96eaa8e9413..cf89fb6be9e1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -88,17 +88,31 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
-/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+
+ /* A new stateid is added to the idr early before it is
+ * fully initialised. Its sc_type is then zero.
+ * After initialisation the sc_type it set under cl_lock,
+ * and then never changes.
+ */
+#define NFS4_OPEN_STID BIT(0)
+#define NFS4_LOCK_STID BIT(1)
+#define NFS4_DELEG_STID BIT(2)
+#define NFS4_LAYOUT_STID BIT(3)
+ unsigned short sc_type;
+/* state_lock protects sc_status for delegation stateids.
+ * ->cl_lock protects sc_status for open and lock stateids.
+ * ->st_mutex also protect sc_status for open stateids.
+ */
+/*
+ * For an open stateid kept around *only* to process close replays.
+ * For deleg stateid, kept in idr until last reference is dropped.
+ */
+#define NFS4_STID_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
+#define NFS4_STID_REVOKED BIT(1)
+ unsigned short sc_status;
+
struct list_head sc_cp_list;
- unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@@ -694,8 +708,9 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
- struct nfs4_stid **s, struct nfsd_net *nn);
+ stateid_t *stateid, unsigned short typemask,
+ unsigned short statemask,
+ struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index fbc0ccb40424..668b352faaea 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -641,31 +641,26 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);

-TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
-TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
-TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
-TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
-
#define show_stid_type(x) \
__print_flags(x, "|", \
{ NFS4_OPEN_STID, "OPEN" }, \
{ NFS4_LOCK_STID, "LOCK" }, \
{ NFS4_DELEG_STID, "DELEG" }, \
- { NFS4_CLOSED_STID, "CLOSED" }, \
- { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
- { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
{ NFS4_LAYOUT_STID, "LAYOUT" })

+#define show_stid_status(x) \
+ __print_flags(x, "|", \
+ { NFS4_STID_CLOSED, "CLOSED" }, \
+ { NFS4_STID_REVOKED, "REVOKED" }) \
+
DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO(
const struct nfs4_stid *stid
),
TP_ARGS(stid),
TP_STRUCT__entry(
- __field(unsigned long, sc_type)
+ __field(unsigned short, sc_type)
+ __field(unsigned short, sc_status)
__field(int, sc_count)
__field(u32, cl_boot)
__field(u32, cl_id)
@@ -676,16 +671,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
const stateid_t *stp = &stid->sc_stateid;

__entry->sc_type = stid->sc_type;
+ __entry->sc_status = stid->sc_status;
__entry->sc_count = refcount_read(&stid->sc_count);
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
),
- TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
+ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
__entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation,
- __entry->sc_count, show_stid_type(__entry->sc_type)
+ __entry->sc_count, show_stid_type(__entry->sc_type),
+ show_stid_status(__entry->sc_status)
)
);

--
2.42.0

2023-11-03 14:34:45

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Fri, Nov 03, 2023 at 12:08:04PM +1100, NeilBrown wrote:
> On Fri, 03 Nov 2023, Chuck Lever III wrote:
> >
> > > On Nov 2, 2023, at 1:24 PM, Jeff Layton <[email protected]> wrote:
> > >
> > > On Fri, 2023-11-03 at 07:03 +1100, NeilBrown wrote:
> > >> On Thu, 02 Nov 2023, Jeff Layton wrote:
> > >>> On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> > >>>> The NFSv4 protocol allows state to be revoked by the admin and has error
> > >>>> codes which allow this to be communicated to the client.
> > >>>>
> > >>>> This patch
> > >>>> - introduces 3 new state-id types for revoked open, lock, and
> > >>>> delegation state. This requires the bitmask to be 'short',
> > >>>> not 'char'
> > >>>> - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> > >>>> - introduces a per-client counter of these states and returns
> > >>>> SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> > >>>> Decrement this when freeing any admin-revoked state.
> > >>>> - introduces stub code to find all interesting states for a given
> > >>>> superblock so they can be revoked via the 'unlock_filesystem'
> > >>>> file in /proc/fs/nfsd/
> > >>>> No actual states are handled yet.
> > >>>>
> > >>>> Signed-off-by: NeilBrown <[email protected]>
> > >>>> ---
> > >>>> fs/nfsd/nfs4layouts.c | 2 +-
> > >>>> fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> > >>>> fs/nfsd/nfsctl.c | 1 +
> > >>>> fs/nfsd/nfsd.h | 1 +
> > >>>> fs/nfsd/state.h | 35 +++++++++++-----
> > >>>> fs/nfsd/trace.h | 8 +++-
> > >>>> 6 files changed, 120 insertions(+), 20 deletions(-)
> > >>>>
> > >>>> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > >>>> index 5e8096bc5eaa..09d0363bfbc4 100644
> > >>>> --- a/fs/nfsd/nfs4layouts.c
> > >>>> +++ b/fs/nfsd/nfs4layouts.c
> > >>>> @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> > >>>> {
> > >>>> struct nfs4_layout_stateid *ls;
> > >>>> struct nfs4_stid *stid;
> > >>>> - unsigned char typemask = NFS4_LAYOUT_STID;
> > >>>> + unsigned short typemask = NFS4_LAYOUT_STID;
> > >>>> __be32 status;
> > >>>>
> > >>>> if (create)
> > >>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > >>>> index 65fd5510323a..f3ba53a16105 100644
> > >>>> --- a/fs/nfsd/nfs4state.c
> > >>>> +++ b/fs/nfsd/nfs4state.c
> > >>>> @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > >>>> return NULL;
> > >>>> }
> > >>>>
> > >>>> +void nfs4_unhash_stid(struct nfs4_stid *s)
> > >>>> +{
> > >>>> + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> > >>>> + atomic_dec(&s->sc_client->cl_admin_revoked);
> > >>>> + s->sc_type = 0;
> > >>>> +}
> > >>>> +
> > >>>> void
> > >>>> nfs4_put_stid(struct nfs4_stid *s)
> > >>>> {
> > >>>> @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> > >>>> return;
> > >>>> }
> > >>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > >>>> + nfs4_unhash_stid(s);
> > >>>> nfs4_free_cpntf_statelist(clp->net, s);
> > >>>> spin_unlock(&clp->cl_lock);
> > >>>> s->sc_free(s);
> > >>>> @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> > >>>> nfs4_put_stid(&dp->dl_stid);
> > >>>> }
> > >>>>
> > >>>> -void nfs4_unhash_stid(struct nfs4_stid *s)
> > >>>> -{
> > >>>> - s->sc_type = 0;
> > >>>> -}
> > >>>> -
> > >>>> /**
> > >>>> * nfs4_delegation_exists - Discover if this delegation already exists
> > >>>> * @clp: a pointer to the nfs4_client we're granting a delegation to
> > >>>> @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> > >>>> }
> > >>>>
> > >>>> idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > >>>> + nfs4_unhash_stid(s);
> > >>>> list_add(&stp->st_locks, reaplist);
> > >>>> }
> > >>>>
> > >>>> @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> > >>>> nfs4_put_stateowner(&oo->oo_owner);
> > >>>> }
> > >>>>
> > >>>> +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> > >>>> + struct super_block *sb,
> > >>>> + unsigned short sc_types)
> > >>>> +{
> > >>>> + unsigned long id, tmp;
> > >>>> + struct nfs4_stid *stid;
> > >>>> +
> > >>>> + spin_lock(&clp->cl_lock);
> > >>>> + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> > >>>> + if ((stid->sc_type & sc_types) &&
> > >>>> + stid->sc_file->fi_inode->i_sb == sb) {
> > >>>> + refcount_inc(&stid->sc_count);
> > >>>> + break;
> > >>>> + }
> > >>>> + spin_unlock(&clp->cl_lock);
> > >>>> + return stid;
> > >>>> +}
> > >>>> +
> > >>>> +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > >>>> +{
> > >>>> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > >>>> + unsigned int idhashval;
> > >>>> + unsigned short sc_types;
> > >>>> +
> > >>>> + sc_types = 0;
> > >>>> +
> > >>>> + spin_lock(&nn->client_lock);
> > >>>> + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > >>>> + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> > >>>> + struct nfs4_client *clp;
> > >>>> + retry:
> > >>>> + list_for_each_entry(clp, head, cl_idhash) {
> > >>>> + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> > >>>> + sc_types);
> > >>>> + if (stid) {
> > >>>> + spin_unlock(&nn->client_lock);
> > >>>> + switch (stid->sc_type) {
> > >>>> + }
> > >>>> + nfs4_put_stid(stid);
> > >>>> + spin_lock(&nn->client_lock);
> > >>>> + goto retry;
> > >>>> + }
> > >>>> + }
> > >>>> + }
> > >>>> + spin_unlock(&nn->client_lock);
> > >>>> +}
> > >>>> +
> > >>>> static inline int
> > >>>> hash_sessionid(struct nfs4_sessionid *sessionid)
> > >>>> {
> > >>>> @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> > >>>> }
> > >>>>
> > >>>> static struct nfs4_stid *
> > >>>> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> > >>>> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> > >>>> + unsigned short typemask)
> > >>>> {
> > >>>> struct nfs4_stid *s;
> > >>>>
> > >>>> @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> > >>>> }
> > >>>> seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> > >>>> seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> > >>>> + seq_printf(m, "admin-revoked states: %d\n",
> > >>>> + atomic_read(&clp->cl_admin_revoked));
> > >>>> drop_client(clp);
> > >>>>
> > >>>> return 0;
> > >>>> @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >>>> }
> > >>>> if (!list_empty(&clp->cl_revoked))
> > >>>> seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> > >>>> + if (atomic_read(&clp->cl_admin_revoked))
> > >>>> + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> > >>>> out_no_session:
> > >>>> if (conn)
> > >>>> free_conn(conn);
> > >>>> @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> > >>>> status = nfserr_deleg_revoked;
> > >>>> goto out;
> > >>>> }
> > >>>> + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> > >>>> + nfs4_put_stid(&deleg->dl_stid);
> > >>>> + status = nfserr_admin_revoked;
> > >>>> + goto out;
> > >>>> + }
> > >>>> flags = share_access_to_flags(open->op_share_access);
> > >>>> status = nfs4_check_delegmode(deleg, flags);
> > >>>> if (status) {
> > >>>> @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > >>>> case NFS4_REVOKED_DELEG_STID:
> > >>>> status = nfserr_deleg_revoked;
> > >>>> break;
> > >>>> + case NFS4_ADMIN_REVOKED_STID:
> > >>>> + case NFS4_ADMIN_REVOKED_LOCK_STID:
> > >>>> + case NFS4_ADMIN_REVOKED_DELEG_STID:
> > >>>> + status = nfserr_admin_revoked;
> > >>>> + break;
> > >>>> case NFS4_OPEN_STID:
> > >>>> case NFS4_LOCK_STID:
> > >>>> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> > >>>> @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > >>>>
> > >>>> __be32
> > >>>> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > >>>> - stateid_t *stateid, unsigned char typemask,
> > >>>> + stateid_t *stateid, unsigned short typemask,
> > >>>> struct nfs4_stid **s, struct nfsd_net *nn)
> > >>>> {
> > >>>> __be32 status;
> > >>>> @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > >>>> else if (typemask & NFS4_DELEG_STID)
> > >>>> typemask |= NFS4_REVOKED_DELEG_STID;
> > >>>>
> > >>>> + if (typemask & NFS4_OPEN_STID)
> > >>>> + typemask |= NFS4_ADMIN_REVOKED_STID;
> > >>>> + if (typemask & NFS4_LOCK_STID)
> > >>>> + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> > >>>> + if (typemask & NFS4_DELEG_STID)
> > >>>> + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> > >>>> +
> > >>>> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> > >>>> CLOSE_STATEID(stateid))
> > >>>> return nfserr_bad_stateid;
> > >>>> @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > >>>> return nfserr_deleg_revoked;
> > >>>> return nfserr_bad_stateid;
> > >>>> }
> > >>>> + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> > >>>> + nfs4_put_stid(stid);
> > >>>> + return nfserr_admin_revoked;
> > >>>> + }
> > >>>> *s = stid;
> > >>>> return nfs_ok;
> > >>>> }
> > >>>> @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> > >>>> */
> > >>>> static __be32
> > >>>> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> > >>>> - stateid_t *stateid, char typemask,
> > >>>> + stateid_t *stateid, unsigned short typemask,
> > >>>> struct nfs4_ol_stateid **stpp,
> > >>>> struct nfsd_net *nn)
> > >>>> {
> > >>>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > >>>> index 739ed5bf71cd..50368eec86b0 100644
> > >>>> --- a/fs/nfsd/nfsctl.c
> > >>>> +++ b/fs/nfsd/nfsctl.c
> > >>>> @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> > >>>> * 3. Is that directory the root of an exported file system?
> > >>>> */
> > >>>> error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> > >>>> + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> > >>>>
> > >>>> path_put(&path);
> > >>>> return error;
> > >>>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > >>>> index f5ff42f41ee7..d46203eac3c8 100644
> > >>>> --- a/fs/nfsd/nfsd.h
> > >>>> +++ b/fs/nfsd/nfsd.h
> > >>>> @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> > >>>> #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> > >>>> #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> > >>>> #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> > >>>> +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> > >>>> #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> > >>>> #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> > >>>> #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> > >>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > >>>> index f96eaa8e9413..3af5ab55c978 100644
> > >>>> --- a/fs/nfsd/state.h
> > >>>> +++ b/fs/nfsd/state.h
> > >>>> @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> > >>>> */
> > >>>> struct nfs4_stid {
> > >>>> refcount_t sc_count;
> > >>>> -#define NFS4_OPEN_STID 1
> > >>>> -#define NFS4_LOCK_STID 2
> > >>>> -#define NFS4_DELEG_STID 4
> > >>>> + struct list_head sc_cp_list;
> > >>>> + unsigned short sc_type;
> > >>>
> > >>> Should we just go ahead and make this a full 32-bit word? We seem to
> > >>> keep adding flags to this field, and I doubt we're saving anything by
> > >>> making this a short.
> > >>>
> > >>>> +#define NFS4_OPEN_STID BIT(0)
> > >>>> +#define NFS4_LOCK_STID BIT(1)
> > >>>> +#define NFS4_DELEG_STID BIT(2)
> > >>>> /* For an open stateid kept around *only* to process close replays: */
> > >>>> -#define NFS4_CLOSED_STID 8
> > >>>> +#define NFS4_CLOSED_STID BIT(3)
> > >>>> /* For a deleg stateid kept around only to process free_stateid's: */
> > >>>> -#define NFS4_REVOKED_DELEG_STID 16
> > >>>> -#define NFS4_CLOSED_DELEG_STID 32
> > >>>> -#define NFS4_LAYOUT_STID 64
> > >>>> - struct list_head sc_cp_list;
> > >>>> - unsigned char sc_type;
> > >>>> +#define NFS4_REVOKED_DELEG_STID BIT(4)
> > >>>> +#define NFS4_CLOSED_DELEG_STID BIT(5)
> > >>>> +#define NFS4_LAYOUT_STID BIT(6)
> > >>>> +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> > >>>> +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> > >>>> +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> > >>>> +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> > >>>> + NFS4_ADMIN_REVOKED_LOCK_STID | \
> > >>>> + NFS4_ADMIN_REVOKED_DELEG_STID)
> > >>>
> > >>> Not a specific criticism of these patches, since this problem preexists
> > >>> them, but I really dislike the way that sc_type is used as a bitmask,
> > >>> but also sort of like an enum. In some cases, we test for specific flags
> > >>> in the mask, and in other cases (e.g. states_show), we treat them as
> > >>> discrete values to feed it to a switch().
> > >>>
> > >>> Personally, I'd find this less confusing if we just treat this as a set
> > >>> of flags full-stop. We could leave the low-order bits to show the real
> > >>> type (open, lock, deleg, etc.) and just mask off the high-order bits
> > >>> when we need to feed it to a switch statement.
> > >>>
> > >>> For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
> > >>> could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
> > >>> and leave the old type bit in place instead of changing it to a new
> > >>> discrete sc_type value.
> > >>
> > >> I agree.
> > >> Bits might be:
> > >> OPEN
> > >> LOCK
> > >> DELEG
> > >> LAYOUT
> > >> CLOSED (combines with OPEN or DELEG)
> > >> REVOKED (combines with DELEG)
> > >> ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
> > >> with DELEG)
> > >>
> > >> so we could go back to a char :-) Probably sensible to use unsigned int
> > >> though.
> > >>
> > >
> > > Yeah, a u32 would be best I think. It'll just fill an existing hole on
> > > my box (x86_64), according to pahole:
> > >
> > > unsigned char sc_type; /* 24 1 */
> > >
> > > /* XXX 3 bytes hole, try to pack */
> > >
> > >> For updates the rule would be that bits can be set but never cleared so
> > >> you don't need locking to read unless you care about a bit that can be
> > >> changed.
> > >>
> > >
> > > Probably for the low order OPEN/LOCK/DELEG bits, the rule should be that
> > > they never change. We can never convert from one to another since they
> > > come out of different slabcaches. It's probably worthwhile to leave a
> > > few low order bits carved out for new types in the future too. e.g.
> > > directory delegations...
> > >
> > > Maybe we should rename the field too? How about "sc_mode" since this is
> > > starting to resemble the i_mode field in some ways (type and permissions
> > > squashed together).
> >
> > In that case, two separate u16 fields might be better.
> >
>
> Here is a first attempt. It compiles but I haven't tried running or
> thought about what testing would be appropriate.
> I'll be working on other things next week but I hope to pick this up
> again the following week and process any feedback, then see how my
> admin-revoke patch set fits on this new code.
>
> All comments most welcome.
>
> Thanks,
> NeilBrown
>
>
> From: NeilBrown <[email protected]>
> Date: Fri, 3 Nov 2023 11:43:55 +1100
> Subject: [PATCH] nfsd: tidy up sc_type
>
> sc_type identifies the type of a state - open, lock, deleg, layout - and
> also the status of a state - closed or revoked.
>
> This is a bit untidy and could get worse when "admin-revoked" states are
> added. So try to clean it up.
>
> The type is now all that is stored in sc_type. This is zero when the
> state is first added to the idr (causing it to be ignored), and if then
> set appropriately once it is fully initialised. It is set under
> ->cl_lock to ensure atomicity w.r.t lookup. It is (now) never cleared.
>
> This is still a bit-set even though at most one bit is set. This allows
> lookup functions to be given a bitmap of acceptable types.
>
> cl_type is now an unsigned short rather than char. There is no value in
> restricting to just 8 bits.
>
> The status is stored in a separate short named "cl_status". It has two
> flags: NFS4_STID_CLOSED and NFS4_STID_REVOKED.
> CLOSED combines NFS4_CLOSED_STID, NFS4_CLOSED_DELEG_STID, and is used
> for LOCK_STID instead of setting the sc_type to zero.
> These flags are only ever set, never cleared.
> For deleg stateids they are set under the global state_lock.
> For open and lock stateids they are set under ->cl_lock.
>
> Other changes here - some of which could be split out...
>
> - When a delegation is revoked, the type was previously set to
> NFS4_CLOSED_DELEG_STID and then NFS4_REVOKED_DELEG_STID.
> This might open a race window. That window no longer exists.
>
> - NFS4_STID_REVOKED (previously NFS4_REVOKED_DELEG_STID) is only set
> for ->minorversion>0, so I removed all the tests on minorversion when
> that status has been detected.
>
> - nfs4_unhash_stid() has been removed, and we never set sc_type = 0.
> This was only used for LOCK stids and they now use NFS4_STID_CLOSED
>
> - ->cl_lock is now hel when hash_delegation_locked() is called, so
> that the locking rules for setting ->sc_type are followed.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfsd/nfs4layouts.c | 4 +-
> fs/nfsd/nfs4state.c | 165 ++++++++++++++++++++----------------------
> fs/nfsd/state.h | 37 +++++++---
> fs/nfsd/trace.h | 25 +++----
> 4 files changed, 119 insertions(+), 112 deletions(-)
>
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 5e8096bc5eaa..678bef3a7f15 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -269,13 +269,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> {
> struct nfs4_layout_stateid *ls;
> struct nfs4_stid *stid;
> - unsigned char typemask = NFS4_LAYOUT_STID;
> + unsigned short typemask = NFS4_LAYOUT_STID;
> __be32 status;
>
> if (create)
> typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
>
> - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
> + status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
> net_generic(SVC_NET(rqstp), nfsd_net_id));
> if (status)
> goto out;
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 65fd5510323a..551a86a796ed 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1265,11 +1265,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> nfs4_put_stid(&dp->dl_stid);
> }
>
> -void nfs4_unhash_stid(struct nfs4_stid *s)
> -{
> - s->sc_type = 0;
> -}
> -
> /**
> * nfs4_delegation_exists - Discover if this delegation already exists
> * @clp: a pointer to the nfs4_client we're granting a delegation to
> @@ -1317,6 +1312,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
>
> lockdep_assert_held(&state_lock);
> lockdep_assert_held(&fp->fi_lock);
> + lockdep_assert_held(&clp->cl_lock);
>
> if (nfs4_delegation_exists(clp, fp))
> return -EAGAIN;
> @@ -1333,7 +1329,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
> }
>
> static bool
> -unhash_delegation_locked(struct nfs4_delegation *dp)
> +unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short state)
> {
> struct nfs4_file *fp = dp->dl_stid.sc_file;
>
> @@ -1342,7 +1338,10 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
> if (!delegation_hashed(dp))
> return false;
>
> - dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
> + if (dp->dl_stid.sc_client->cl_minorversion == 0)
> + state = NFS4_STID_CLOSED;
> + dp->dl_stid.sc_status |= state | NFS4_STID_CLOSED;
> +
> /* Ensure that deleg break won't try to requeue it */
> ++dp->dl_time;
> spin_lock(&fp->fi_lock);
> @@ -1358,7 +1357,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
> bool unhashed;
>
> spin_lock(&state_lock);
> - unhashed = unhash_delegation_locked(dp);
> + unhashed = unhash_delegation_locked(dp, NFS4_STID_CLOSED);
> spin_unlock(&state_lock);
> if (unhashed)
> destroy_unhashed_deleg(dp);
> @@ -1372,9 +1371,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
>
> trace_nfsd_stid_revoke(&dp->dl_stid);
>
> - if (clp->cl_minorversion) {
> + if (dp->dl_stid.sc_status & NFS4_STID_REVOKED) {
> spin_lock(&clp->cl_lock);
> - dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
> refcount_inc(&dp->dl_stid.sc_count);
> list_add(&dp->dl_recall_lru, &clp->cl_revoked);
> spin_unlock(&clp->cl_lock);
> @@ -1382,8 +1380,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
> destroy_unhashed_deleg(dp);
> }
>
> -/*
> - * SETCLIENTID state
> +/*
> + * SETCLIENTID state
> */
>
> static unsigned int clientid_hashval(u32 id)
> @@ -1546,7 +1544,6 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
> if (!unhash_ol_stateid(stp))
> return false;
> list_del_init(&stp->st_locks);
> - nfs4_unhash_stid(&stp->st_stid);
> return true;
> }
>
> @@ -1557,6 +1554,7 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp)
>
> spin_lock(&clp->cl_lock);
> unhashed = unhash_lock_stateid(stp);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> spin_unlock(&clp->cl_lock);
> if (unhashed)
> nfs4_put_stid(&stp->st_stid);
> @@ -1625,6 +1623,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
> LIST_HEAD(reaplist);
>
> spin_lock(&stp->st_stid.sc_client->cl_lock);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> if (unhash_open_stateid(stp, &reaplist))
> put_ol_stateid_locked(stp, &reaplist);
> spin_unlock(&stp->st_stid.sc_client->cl_lock);
> @@ -2233,7 +2232,7 @@ __destroy_client(struct nfs4_client *clp)
> spin_lock(&state_lock);
> while (!list_empty(&clp->cl_delegations)) {
> dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> @@ -2465,14 +2464,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> }
>
> static struct nfs4_stid *
> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> + unsigned short typemask, unsigned short ok_states)
> {
> struct nfs4_stid *s;
>
> spin_lock(&cl->cl_lock);
> s = find_stateid_locked(cl, t);
> if (s != NULL) {
> - if (typemask & s->sc_type)
> + if ((s->sc_status & ~ok_states) == 0 &&
> + (typemask & s->sc_type))
> refcount_inc(&s->sc_count);
> else
> s = NULL;
> @@ -4582,7 +4583,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> continue;
> if (local->st_stateowner != &oo->oo_owner)
> continue;
> - if (local->st_stid.sc_type == NFS4_OPEN_STID) {
> + if (local->st_stid.sc_type == NFS4_OPEN_STID &&
> + !(local->st_stid.sc_status & NFS4_STID_CLOSED)) {
> ret = local;
> refcount_inc(&ret->st_stid.sc_count);
> break;
> @@ -4596,17 +4598,10 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
> {
> __be32 ret = nfs_ok;
>
> - switch (s->sc_type) {
> - default:
> - break;
> - case 0:
> - case NFS4_CLOSED_STID:
> - case NFS4_CLOSED_DELEG_STID:
> - ret = nfserr_bad_stateid;
> - break;
> - case NFS4_REVOKED_DELEG_STID:
> + if (s->sc_status & NFS4_STID_REVOKED)
> ret = nfserr_deleg_revoked;
> - }
> + else if (s->sc_status & NFS4_STID_CLOSED)
> + ret = nfserr_bad_stateid;
> return ret;
> }
>
> @@ -4919,9 +4914,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
>
> trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
>
> - if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
> - dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
> - return 1;
> + if (dp->dl_stid.sc_status)
> + /* CLOSED or REVOKED */
> + return 1;
>
> switch (task->tk_status) {
> case 0:
> @@ -5170,8 +5165,7 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
> {
> struct nfs4_stid *ret;
>
> - ret = find_stateid_by_type(cl, s,
> - NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
> + ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID, 0);
> if (!ret)
> return NULL;
> return delegstateid(ret);
> @@ -5194,10 +5188,9 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
> if (deleg == NULL)
> goto out;
> - if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
> + if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
> nfs4_put_stid(&deleg->dl_stid);
> - if (cl->cl_minorversion)
> - status = nfserr_deleg_revoked;
> + status = nfserr_deleg_revoked;
> goto out;
> }
> flags = share_access_to_flags(open->op_share_access);
> @@ -5609,12 +5602,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> goto out_unlock;
>
> spin_lock(&state_lock);
> + spin_lock(&clp->cl_lock);
> spin_lock(&fp->fi_lock);
> if (fp->fi_had_conflict)
> status = -EAGAIN;
> else
> status = hash_delegation_locked(dp, fp);
> spin_unlock(&fp->fi_lock);
> + spin_unlock(&clp->cl_lock);
> spin_unlock(&state_lock);
>
> if (status)
> @@ -5840,7 +5835,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> } else {
> status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
> if (status) {
> - stp->st_stid.sc_type = NFS4_CLOSED_STID;
> release_open_stateid(stp);
> mutex_unlock(&stp->st_mutex);
> goto out;
> @@ -6232,7 +6226,7 @@ nfs4_laundromat(struct nfsd_net *nn)
> dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> if (!state_expired(&lt, dp->dl_time))
> break;
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_REVOKED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> @@ -6471,22 +6465,20 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
> if (status)
> goto out_unlock;
> + status = nfsd4_verify_open_stid(s);
> + if (status)
> + goto out_unlock;
> +
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> status = nfs_ok;
> break;
> - case NFS4_REVOKED_DELEG_STID:
> - status = nfserr_deleg_revoked;
> - break;
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> break;
> default:
> printk("unknown stateid type %x\n", s->sc_type);
> - fallthrough;
> - case NFS4_CLOSED_STID:
> - case NFS4_CLOSED_DELEG_STID:
> status = nfserr_bad_stateid;
> }
> out_unlock:
> @@ -6496,7 +6488,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>
> __be32
> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid,
> + unsigned short typemask, unsigned short statemask,
> struct nfs4_stid **s, struct nfsd_net *nn)
> {
> __be32 status;
> @@ -6507,10 +6500,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> * only return revoked delegations if explicitly asked.
> * otherwise we report revoked or bad_stateid status.
> */
> - if (typemask & NFS4_REVOKED_DELEG_STID)
> + if (statemask & NFS4_STID_REVOKED)
> return_revoked = true;
> - else if (typemask & NFS4_DELEG_STID)
> - typemask |= NFS4_REVOKED_DELEG_STID;
>
> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> CLOSE_STATEID(stateid))
> @@ -6523,14 +6514,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> }
> if (status)
> return status;
> - stid = find_stateid_by_type(cstate->clp, stateid, typemask);
> + stid = find_stateid_by_type(cstate->clp, stateid,
> + typemask, statemask & NFS4_STID_CLOSED);
> if (!stid)
> return nfserr_bad_stateid;
> - if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
> + if ((stid->sc_status & NFS4_STID_REVOKED) && !return_revoked) {
> nfs4_put_stid(stid);
> - if (cstate->minorversion)
> - return nfserr_deleg_revoked;
> - return nfserr_bad_stateid;
> + return nfserr_deleg_revoked;
> }
> *s = stid;
> return nfs_ok;
> @@ -6541,7 +6531,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
> {
> struct nfsd_file *ret = NULL;
>
> - if (!s)
> + if (!s || s->sc_status)
> return NULL;
>
> switch (s->sc_type) {
> @@ -6664,7 +6654,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
> goto out;
>
> *stid = find_stateid_by_type(found, &cps->cp_p_stateid,
> - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
> + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
> + 0);
> if (*stid)
> status = nfs_ok;
> else
> @@ -6722,7 +6713,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
>
> status = nfsd4_lookup_stateid(cstate, stateid,
> NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
> - &s, nn);
> + 0, &s, nn);
> if (status == nfserr_bad_stateid)
> status = find_cpntf_state(nn, stateid, &s);
> if (status)
> @@ -6740,9 +6731,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> case NFS4_LOCK_STID:
> status = nfs4_check_olstateid(openlockstateid(s), flags);
> break;
> - default:
> - status = nfserr_bad_stateid;
> - break;
> }
> if (status)
> goto out;
> @@ -6821,11 +6809,20 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>
> spin_lock(&cl->cl_lock);
> s = find_stateid_locked(cl, stateid);
> - if (!s)
> + if (!s || s->sc_status & NFS4_STID_CLOSED)
> goto out_unlock;
> spin_lock(&s->sc_lock);
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> + if (s->sc_status & NFS4_STID_REVOKED) {
> + spin_unlock(&s->sc_lock);
> + dp = delegstateid(s);
> + list_del_init(&dp->dl_recall_lru);
> + spin_unlock(&cl->cl_lock);
> + nfs4_put_stid(s);
> + ret = nfs_ok;
> + goto out;
> + }
> ret = nfserr_locks_held;
> break;
> case NFS4_OPEN_STID:
> @@ -6840,14 +6837,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> spin_unlock(&cl->cl_lock);
> ret = nfsd4_free_lock_stateid(stateid, s);
> goto out;
> - case NFS4_REVOKED_DELEG_STID:
> - spin_unlock(&s->sc_lock);
> - dp = delegstateid(s);
> - list_del_init(&dp->dl_recall_lru);
> - spin_unlock(&cl->cl_lock);
> - nfs4_put_stid(s);
> - ret = nfs_ok;
> - goto out;
> /* Default falls through and returns nfserr_bad_stateid */
> }
> spin_unlock(&s->sc_lock);
> @@ -6890,6 +6879,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> * @seqid: seqid (provided by client)
> * @stateid: stateid (provided by client)
> * @typemask: mask of allowable types for this operation
> + * @statemask: mask of allowed states: 0 or STID_CLOSED
> * @stpp: return pointer for the stateid found
> * @nn: net namespace for request
> *
> @@ -6899,7 +6889,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> */
> static __be32
> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> - stateid_t *stateid, char typemask,
> + stateid_t *stateid,
> + unsigned short typemask, unsigned short statemask,
> struct nfs4_ol_stateid **stpp,
> struct nfsd_net *nn)
> {
> @@ -6910,7 +6901,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> trace_nfsd_preprocess(seqid, stateid);
>
> *stpp = NULL;
> - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
> + status = nfsd4_lookup_stateid(cstate, stateid,
> + typemask, statemask, &s, nn);
> if (status)
> return status;
> stp = openlockstateid(s);
> @@ -6932,7 +6924,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
> struct nfs4_ol_stateid *stp;
>
> status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
> - NFS4_OPEN_STID, &stp, nn);
> + NFS4_OPEN_STID, 0, &stp, nn);
> if (status)
> return status;
> oo = openowner(stp->st_stateowner);
> @@ -6963,8 +6955,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> return status;
>
> status = nfs4_preprocess_seqid_op(cstate,
> - oc->oc_seqid, &oc->oc_req_stateid,
> - NFS4_OPEN_STID, &stp, nn);
> + oc->oc_seqid, &oc->oc_req_stateid,
> + NFS4_OPEN_STID, 0, &stp, nn);
> if (status)
> goto out;
> oo = openowner(stp->st_stateowner);
> @@ -7094,18 +7086,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> struct net *net = SVC_NET(rqstp);
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>
> - dprintk("NFSD: nfsd4_close on file %pd\n",
> + dprintk("NFSD: nfsd4_close on file %pd\n",
> cstate->current_fh.fh_dentry);
>
> status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
> - &close->cl_stateid,
> - NFS4_OPEN_STID|NFS4_CLOSED_STID,
> - &stp, nn);
> + &close->cl_stateid,
> + NFS4_OPEN_STID, NFS4_STID_CLOSED,
> + &stp, nn);
> nfsd4_bump_seqid(cstate, status);
> if (status)
> - goto out;
> + goto out;
>
> - stp->st_stid.sc_type = NFS4_CLOSED_STID;
> + spin_lock(&stp->st_stid.sc_client->cl_lock);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> + spin_unlock(&stp->st_stid.sc_client->cl_lock);
>
> /*
> * Technically we don't _really_ have to increment or copy it, since
> @@ -7147,7 +7141,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
> return status;
>
> - status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
> + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, 0, &s, nn);
> if (status)
> goto out;
> dp = delegstateid(s);
> @@ -7601,9 +7595,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> &lock_stp, &new);
> } else {
> status = nfs4_preprocess_seqid_op(cstate,
> - lock->lk_old_lock_seqid,
> - &lock->lk_old_lock_stateid,
> - NFS4_LOCK_STID, &lock_stp, nn);
> + lock->lk_old_lock_seqid,
> + &lock->lk_old_lock_stateid,
> + NFS4_LOCK_STID, 0, &lock_stp,
> + nn);
> }
> if (status)
> goto out;
> @@ -7916,8 +7911,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> return nfserr_inval;
>
> status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
> - &locku->lu_stateid, NFS4_LOCK_STID,
> - &stp, nn);
> + &locku->lu_stateid, NFS4_LOCK_STID, 0,
> + &stp, nn);
> if (status)
> goto out;
> nf = find_any_file(stp->st_stid.sc_file);
> @@ -8347,7 +8342,7 @@ nfs4_state_shutdown_net(struct net *net)
> spin_lock(&state_lock);
> list_for_each_safe(pos, next, &nn->del_recall_lru) {
> dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f96eaa8e9413..cf89fb6be9e1 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -88,17 +88,31 @@ struct nfsd4_callback_ops {
> */
> struct nfs4_stid {
> refcount_t sc_count;
> -#define NFS4_OPEN_STID 1
> -#define NFS4_LOCK_STID 2
> -#define NFS4_DELEG_STID 4
> -/* For an open stateid kept around *only* to process close replays: */
> -#define NFS4_CLOSED_STID 8
> +
> + /* A new stateid is added to the idr early before it is
> + * fully initialised. Its sc_type is then zero.
> + * After initialisation the sc_type it set under cl_lock,
> + * and then never changes.
> + */
> +#define NFS4_OPEN_STID BIT(0)
> +#define NFS4_LOCK_STID BIT(1)
> +#define NFS4_DELEG_STID BIT(2)
> +#define NFS4_LAYOUT_STID BIT(3)
> + unsigned short sc_type;
> +/* state_lock protects sc_status for delegation stateids.
> + * ->cl_lock protects sc_status for open and lock stateids.
> + * ->st_mutex also protect sc_status for open stateids.
> + */
> +/*
> + * For an open stateid kept around *only* to process close replays.
> + * For deleg stateid, kept in idr until last reference is dropped.
> + */
> +#define NFS4_STID_CLOSED BIT(0)
> /* For a deleg stateid kept around only to process free_stateid's: */
> -#define NFS4_REVOKED_DELEG_STID 16
> -#define NFS4_CLOSED_DELEG_STID 32
> -#define NFS4_LAYOUT_STID 64
> +#define NFS4_STID_REVOKED BIT(1)
> + unsigned short sc_status;
> +
> struct list_head sc_cp_list;
> - unsigned char sc_type;
> stateid_t sc_stateid;
> spinlock_t sc_lock;
> struct nfs4_client *sc_client;
> @@ -694,8 +708,9 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> stateid_t *stateid, int flags, struct nfsd_file **filp,
> struct nfs4_stid **cstid);
> __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> - struct nfs4_stid **s, struct nfsd_net *nn);
> + stateid_t *stateid, unsigned short typemask,
> + unsigned short statemask,
> + struct nfs4_stid **s, struct nfsd_net *nn);
> struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> void (*sc_free)(struct nfs4_stid *));
> int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index fbc0ccb40424..668b352faaea 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -641,31 +641,26 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
> DEFINE_STATESEQID_EVENT(preprocess);
> DEFINE_STATESEQID_EVENT(open_confirm);
>
> -TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
> -TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
> -TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> -TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> -
> #define show_stid_type(x) \
> __print_flags(x, "|", \
> { NFS4_OPEN_STID, "OPEN" }, \
> { NFS4_LOCK_STID, "LOCK" }, \
> { NFS4_DELEG_STID, "DELEG" }, \
> - { NFS4_CLOSED_STID, "CLOSED" }, \
> - { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> - { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> { NFS4_LAYOUT_STID, "LAYOUT" })
>
> +#define show_stid_status(x) \
> + __print_flags(x, "|", \
> + { NFS4_STID_CLOSED, "CLOSED" }, \
> + { NFS4_STID_REVOKED, "REVOKED" }) \
> +
> DECLARE_EVENT_CLASS(nfsd_stid_class,
> TP_PROTO(
> const struct nfs4_stid *stid
> ),
> TP_ARGS(stid),
> TP_STRUCT__entry(
> - __field(unsigned long, sc_type)
> + __field(unsigned short, sc_type)
> + __field(unsigned short, sc_status)

I'll let Jeff comment on the other areas. Here, I prefer that
anything that is passed to __print_flags or __print_symbolic be
stored in an unsigned long, since that's the type of the @val
argument to trace_print_symbols_seq(). That's why the sc_type
entry is already an unsigned long.

That way any type conversion that might be necessary is visible
(and controllable) in the TP_fast_assign section of the trace
point.

Note that these days, trace events are stored in the ring buffer
in a compressed form, so the actual size of record entries is
typically not critical.

(This is a nit/general comment -- I can fix this up when I apply
it if Jeff is happy with the rest of the patch).


> __field(int, sc_count)
> __field(u32, cl_boot)
> __field(u32, cl_id)
> @@ -676,16 +671,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
> const stateid_t *stp = &stid->sc_stateid;
>
> __entry->sc_type = stid->sc_type;
> + __entry->sc_status = stid->sc_status;
> __entry->sc_count = refcount_read(&stid->sc_count);
> __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
> __entry->cl_id = stp->si_opaque.so_clid.cl_id;
> __entry->si_id = stp->si_opaque.so_id;
> __entry->si_generation = stp->si_generation;
> ),
> - TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
> + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
> __entry->cl_boot, __entry->cl_id,
> __entry->si_id, __entry->si_generation,
> - __entry->sc_count, show_stid_type(__entry->sc_type)
> + __entry->sc_count, show_stid_type(__entry->sc_type),
> + show_stid_status(__entry->sc_status)
> )
> );
>
> --
> 2.42.0
>

--
Chuck Lever

2023-11-03 17:25:44

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH 1/6] nfsd: prepare for supporting admin-revocation of state

On Fri, 2023-11-03 at 12:08 +1100, NeilBrown wrote:
> On Fri, 03 Nov 2023, Chuck Lever III wrote:
> >
> > > On Nov 2, 2023, at 1:24 PM, Jeff Layton <[email protected]> wrote:
> > >
> > > On Fri, 2023-11-03 at 07:03 +1100, NeilBrown wrote:
> > > > On Thu, 02 Nov 2023, Jeff Layton wrote:
> > > > > On Wed, 2023-11-01 at 11:57 +1100, NeilBrown wrote:
> > > > > > The NFSv4 protocol allows state to be revoked by the admin and has error
> > > > > > codes which allow this to be communicated to the client.
> > > > > >
> > > > > > This patch
> > > > > > - introduces 3 new state-id types for revoked open, lock, and
> > > > > > delegation state. This requires the bitmask to be 'short',
> > > > > > not 'char'
> > > > > > - reports NFS4ERR_ADMIN_REVOKED when these are accessed
> > > > > > - introduces a per-client counter of these states and returns
> > > > > > SEQ4_STATUS_ADMIN_STATE_REVOKED when the counter is not zero.
> > > > > > Decrement this when freeing any admin-revoked state.
> > > > > > - introduces stub code to find all interesting states for a given
> > > > > > superblock so they can be revoked via the 'unlock_filesystem'
> > > > > > file in /proc/fs/nfsd/
> > > > > > No actual states are handled yet.
> > > > > >
> > > > > > Signed-off-by: NeilBrown <[email protected]>
> > > > > > ---
> > > > > > fs/nfsd/nfs4layouts.c | 2 +-
> > > > > > fs/nfsd/nfs4state.c | 93 +++++++++++++++++++++++++++++++++++++++----
> > > > > > fs/nfsd/nfsctl.c | 1 +
> > > > > > fs/nfsd/nfsd.h | 1 +
> > > > > > fs/nfsd/state.h | 35 +++++++++++-----
> > > > > > fs/nfsd/trace.h | 8 +++-
> > > > > > 6 files changed, 120 insertions(+), 20 deletions(-)
> > > > > >
> > > > > > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> > > > > > index 5e8096bc5eaa..09d0363bfbc4 100644
> > > > > > --- a/fs/nfsd/nfs4layouts.c
> > > > > > +++ b/fs/nfsd/nfs4layouts.c
> > > > > > @@ -269,7 +269,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> > > > > > {
> > > > > > struct nfs4_layout_stateid *ls;
> > > > > > struct nfs4_stid *stid;
> > > > > > - unsigned char typemask = NFS4_LAYOUT_STID;
> > > > > > + unsigned short typemask = NFS4_LAYOUT_STID;
> > > > > > __be32 status;
> > > > > >
> > > > > > if (create)
> > > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > > > index 65fd5510323a..f3ba53a16105 100644
> > > > > > --- a/fs/nfsd/nfs4state.c
> > > > > > +++ b/fs/nfsd/nfs4state.c
> > > > > > @@ -1202,6 +1202,13 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
> > > > > > return NULL;
> > > > > > }
> > > > > >
> > > > > > +void nfs4_unhash_stid(struct nfs4_stid *s)
> > > > > > +{
> > > > > > + if (s->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS)
> > > > > > + atomic_dec(&s->sc_client->cl_admin_revoked);
> > > > > > + s->sc_type = 0;
> > > > > > +}
> > > > > > +
> > > > > > void
> > > > > > nfs4_put_stid(struct nfs4_stid *s)
> > > > > > {
> > > > > > @@ -1215,6 +1222,7 @@ nfs4_put_stid(struct nfs4_stid *s)
> > > > > > return;
> > > > > > }
> > > > > > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > > > > > + nfs4_unhash_stid(s);
> > > > > > nfs4_free_cpntf_statelist(clp->net, s);
> > > > > > spin_unlock(&clp->cl_lock);
> > > > > > s->sc_free(s);
> > > > > > @@ -1265,11 +1273,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> > > > > > nfs4_put_stid(&dp->dl_stid);
> > > > > > }
> > > > > >
> > > > > > -void nfs4_unhash_stid(struct nfs4_stid *s)
> > > > > > -{
> > > > > > - s->sc_type = 0;
> > > > > > -}
> > > > > > -
> > > > > > /**
> > > > > > * nfs4_delegation_exists - Discover if this delegation already exists
> > > > > > * @clp: a pointer to the nfs4_client we're granting a delegation to
> > > > > > @@ -1536,6 +1539,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
> > > > > > }
> > > > > >
> > > > > > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> > > > > > + nfs4_unhash_stid(s);
> > > > > > list_add(&stp->st_locks, reaplist);
> > > > > > }
> > > > > >
> > > > > > @@ -1680,6 +1684,53 @@ static void release_openowner(struct nfs4_openowner *oo)
> > > > > > nfs4_put_stateowner(&oo->oo_owner);
> > > > > > }
> > > > > >
> > > > > > +static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
> > > > > > + struct super_block *sb,
> > > > > > + unsigned short sc_types)
> > > > > > +{
> > > > > > + unsigned long id, tmp;
> > > > > > + struct nfs4_stid *stid;
> > > > > > +
> > > > > > + spin_lock(&clp->cl_lock);
> > > > > > + idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
> > > > > > + if ((stid->sc_type & sc_types) &&
> > > > > > + stid->sc_file->fi_inode->i_sb == sb) {
> > > > > > + refcount_inc(&stid->sc_count);
> > > > > > + break;
> > > > > > + }
> > > > > > + spin_unlock(&clp->cl_lock);
> > > > > > + return stid;
> > > > > > +}
> > > > > > +
> > > > > > +void nfsd4_revoke_states(struct net *net, struct super_block *sb)
> > > > > > +{
> > > > > > + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > > > > + unsigned int idhashval;
> > > > > > + unsigned short sc_types;
> > > > > > +
> > > > > > + sc_types = 0;
> > > > > > +
> > > > > > + spin_lock(&nn->client_lock);
> > > > > > + for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
> > > > > > + struct list_head *head = &nn->conf_id_hashtbl[idhashval];
> > > > > > + struct nfs4_client *clp;
> > > > > > + retry:
> > > > > > + list_for_each_entry(clp, head, cl_idhash) {
> > > > > > + struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
> > > > > > + sc_types);
> > > > > > + if (stid) {
> > > > > > + spin_unlock(&nn->client_lock);
> > > > > > + switch (stid->sc_type) {
> > > > > > + }
> > > > > > + nfs4_put_stid(stid);
> > > > > > + spin_lock(&nn->client_lock);
> > > > > > + goto retry;
> > > > > > + }
> > > > > > + }
> > > > > > + }
> > > > > > + spin_unlock(&nn->client_lock);
> > > > > > +}
> > > > > > +
> > > > > > static inline int
> > > > > > hash_sessionid(struct nfs4_sessionid *sessionid)
> > > > > > {
> > > > > > @@ -2465,7 +2516,8 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> > > > > > }
> > > > > >
> > > > > > static struct nfs4_stid *
> > > > > > -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> > > > > > +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> > > > > > + unsigned short typemask)
> > > > > > {
> > > > > > struct nfs4_stid *s;
> > > > > >
> > > > > > @@ -2549,6 +2601,8 @@ static int client_info_show(struct seq_file *m, void *v)
> > > > > > }
> > > > > > seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
> > > > > > seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
> > > > > > + seq_printf(m, "admin-revoked states: %d\n",
> > > > > > + atomic_read(&clp->cl_admin_revoked));
> > > > > > drop_client(clp);
> > > > > >
> > > > > > return 0;
> > > > > > @@ -4108,6 +4162,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > > > }
> > > > > > if (!list_empty(&clp->cl_revoked))
> > > > > > seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
> > > > > > + if (atomic_read(&clp->cl_admin_revoked))
> > > > > > + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
> > > > > > out_no_session:
> > > > > > if (conn)
> > > > > > free_conn(conn);
> > > > > > @@ -5200,6 +5256,11 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> > > > > > status = nfserr_deleg_revoked;
> > > > > > goto out;
> > > > > > }
> > > > > > + if (deleg->dl_stid.sc_type == NFS4_ADMIN_REVOKED_DELEG_STID) {
> > > > > > + nfs4_put_stid(&deleg->dl_stid);
> > > > > > + status = nfserr_admin_revoked;
> > > > > > + goto out;
> > > > > > + }
> > > > > > flags = share_access_to_flags(open->op_share_access);
> > > > > > status = nfs4_check_delegmode(deleg, flags);
> > > > > > if (status) {
> > > > > > @@ -6478,6 +6539,11 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > > > > > case NFS4_REVOKED_DELEG_STID:
> > > > > > status = nfserr_deleg_revoked;
> > > > > > break;
> > > > > > + case NFS4_ADMIN_REVOKED_STID:
> > > > > > + case NFS4_ADMIN_REVOKED_LOCK_STID:
> > > > > > + case NFS4_ADMIN_REVOKED_DELEG_STID:
> > > > > > + status = nfserr_admin_revoked;
> > > > > > + break;
> > > > > > case NFS4_OPEN_STID:
> > > > > > case NFS4_LOCK_STID:
> > > > > > status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> > > > > > @@ -6496,7 +6562,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> > > > > >
> > > > > > __be32
> > > > > > nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > > > > - stateid_t *stateid, unsigned char typemask,
> > > > > > + stateid_t *stateid, unsigned short typemask,
> > > > > > struct nfs4_stid **s, struct nfsd_net *nn)
> > > > > > {
> > > > > > __be32 status;
> > > > > > @@ -6512,6 +6578,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > > > > else if (typemask & NFS4_DELEG_STID)
> > > > > > typemask |= NFS4_REVOKED_DELEG_STID;
> > > > > >
> > > > > > + if (typemask & NFS4_OPEN_STID)
> > > > > > + typemask |= NFS4_ADMIN_REVOKED_STID;
> > > > > > + if (typemask & NFS4_LOCK_STID)
> > > > > > + typemask |= NFS4_ADMIN_REVOKED_LOCK_STID;
> > > > > > + if (typemask & NFS4_DELEG_STID)
> > > > > > + typemask |= NFS4_ADMIN_REVOKED_DELEG_STID;
> > > > > > +
> > > > > > if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> > > > > > CLOSE_STATEID(stateid))
> > > > > > return nfserr_bad_stateid;
> > > > > > @@ -6532,6 +6605,10 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> > > > > > return nfserr_deleg_revoked;
> > > > > > return nfserr_bad_stateid;
> > > > > > }
> > > > > > + if (stid->sc_type & NFS4_ALL_ADMIN_REVOKED_STIDS) {
> > > > > > + nfs4_put_stid(stid);
> > > > > > + return nfserr_admin_revoked;
> > > > > > + }
> > > > > > *s = stid;
> > > > > > return nfs_ok;
> > > > > > }
> > > > > > @@ -6899,7 +6976,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> > > > > > */
> > > > > > static __be32
> > > > > > nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> > > > > > - stateid_t *stateid, char typemask,
> > > > > > + stateid_t *stateid, unsigned short typemask,
> > > > > > struct nfs4_ol_stateid **stpp,
> > > > > > struct nfsd_net *nn)
> > > > > > {
> > > > > > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > > > > > index 739ed5bf71cd..50368eec86b0 100644
> > > > > > --- a/fs/nfsd/nfsctl.c
> > > > > > +++ b/fs/nfsd/nfsctl.c
> > > > > > @@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
> > > > > > * 3. Is that directory the root of an exported file system?
> > > > > > */
> > > > > > error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
> > > > > > + nfsd4_revoke_states(netns(file), path.dentry->d_sb);
> > > > > >
> > > > > > path_put(&path);
> > > > > > return error;
> > > > > > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > > > > > index f5ff42f41ee7..d46203eac3c8 100644
> > > > > > --- a/fs/nfsd/nfsd.h
> > > > > > +++ b/fs/nfsd/nfsd.h
> > > > > > @@ -280,6 +280,7 @@ void nfsd_lockd_shutdown(void);
> > > > > > #define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
> > > > > > #define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
> > > > > > #define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
> > > > > > +#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
> > > > > > #define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
> > > > > > #define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
> > > > > > #define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
> > > > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > > > > > index f96eaa8e9413..3af5ab55c978 100644
> > > > > > --- a/fs/nfsd/state.h
> > > > > > +++ b/fs/nfsd/state.h
> > > > > > @@ -88,17 +88,23 @@ struct nfsd4_callback_ops {
> > > > > > */
> > > > > > struct nfs4_stid {
> > > > > > refcount_t sc_count;
> > > > > > -#define NFS4_OPEN_STID 1
> > > > > > -#define NFS4_LOCK_STID 2
> > > > > > -#define NFS4_DELEG_STID 4
> > > > > > + struct list_head sc_cp_list;
> > > > > > + unsigned short sc_type;
> > > > >
> > > > > Should we just go ahead and make this a full 32-bit word? We seem to
> > > > > keep adding flags to this field, and I doubt we're saving anything by
> > > > > making this a short.
> > > > >
> > > > > > +#define NFS4_OPEN_STID BIT(0)
> > > > > > +#define NFS4_LOCK_STID BIT(1)
> > > > > > +#define NFS4_DELEG_STID BIT(2)
> > > > > > /* For an open stateid kept around *only* to process close replays: */
> > > > > > -#define NFS4_CLOSED_STID 8
> > > > > > +#define NFS4_CLOSED_STID BIT(3)
> > > > > > /* For a deleg stateid kept around only to process free_stateid's: */
> > > > > > -#define NFS4_REVOKED_DELEG_STID 16
> > > > > > -#define NFS4_CLOSED_DELEG_STID 32
> > > > > > -#define NFS4_LAYOUT_STID 64
> > > > > > - struct list_head sc_cp_list;
> > > > > > - unsigned char sc_type;
> > > > > > +#define NFS4_REVOKED_DELEG_STID BIT(4)
> > > > > > +#define NFS4_CLOSED_DELEG_STID BIT(5)
> > > > > > +#define NFS4_LAYOUT_STID BIT(6)
> > > > > > +#define NFS4_ADMIN_REVOKED_STID BIT(7)
> > > > > > +#define NFS4_ADMIN_REVOKED_LOCK_STID BIT(8)
> > > > > > +#define NFS4_ADMIN_REVOKED_DELEG_STID BIT(9)
> > > > > > +#define NFS4_ALL_ADMIN_REVOKED_STIDS (NFS4_ADMIN_REVOKED_STID | \
> > > > > > + NFS4_ADMIN_REVOKED_LOCK_STID | \
> > > > > > + NFS4_ADMIN_REVOKED_DELEG_STID)
> > > > >
> > > > > Not a specific criticism of these patches, since this problem preexists
> > > > > them, but I really dislike the way that sc_type is used as a bitmask,
> > > > > but also sort of like an enum. In some cases, we test for specific flags
> > > > > in the mask, and in other cases (e.g. states_show), we treat them as
> > > > > discrete values to feed it to a switch().
> > > > >
> > > > > Personally, I'd find this less confusing if we just treat this as a set
> > > > > of flags full-stop. We could leave the low-order bits to show the real
> > > > > type (open, lock, deleg, etc.) and just mask off the high-order bits
> > > > > when we need to feed it to a switch statement.
> > > > >
> > > > > For instance above, we're adding 3 new NFS4_ADMIN_REVOKED values, but we
> > > > > could (in theory) just have a flag in there that says NFS4_ADMIN_REVOKED
> > > > > and leave the old type bit in place instead of changing it to a new
> > > > > discrete sc_type value.
> > > >
> > > > I agree.
> > > > Bits might be:
> > > > OPEN
> > > > LOCK
> > > > DELEG
> > > > LAYOUT
> > > > CLOSED (combines with OPEN or DELEG)
> > > > REVOKED (combines with DELEG)
> > > > ADMIN_REVOKED (combined with OPEN, LOCK, DELEG. Sets REVOKED when
> > > > with DELEG)
> > > >
> > > > so we could go back to a char :-) Probably sensible to use unsigned int
> > > > though.
> > > >
> > >
> > > Yeah, a u32 would be best I think. It'll just fill an existing hole on
> > > my box (x86_64), according to pahole:
> > >
> > > unsigned char sc_type; /* 24 1 */
> > >
> > > /* XXX 3 bytes hole, try to pack */
> > >
> > > > For updates the rule would be that bits can be set but never cleared so
> > > > you don't need locking to read unless you care about a bit that can be
> > > > changed.
> > > >
> > >
> > > Probably for the low order OPEN/LOCK/DELEG bits, the rule should be that
> > > they never change. We can never convert from one to another since they
> > > come out of different slabcaches. It's probably worthwhile to leave a
> > > few low order bits carved out for new types in the future too. e.g.
> > > directory delegations...
> > >
> > > Maybe we should rename the field too? How about "sc_mode" since this is
> > > starting to resemble the i_mode field in some ways (type and permissions
> > > squashed together).
> >
> > In that case, two separate u16 fields might be better.
> >
>
> Here is a first attempt. It compiles but I haven't tried running or
> thought about what testing would be appropriate.
> I'll be working on other things next week but I hope to pick this up
> again the following week and process any feedback, then see how my
> admin-revoke patch set fits on this new code.
>
> All comments most welcome.
>
> Thanks,
> NeilBrown
>
>
> From: NeilBrown <[email protected]>
> Date: Fri, 3 Nov 2023 11:43:55 +1100
> Subject: [PATCH] nfsd: tidy up sc_type
>
> sc_type identifies the type of a state - open, lock, deleg, layout - and
> also the status of a state - closed or revoked.
>
> This is a bit untidy and could get worse when "admin-revoked" states are
> added. So try to clean it up.
>
> The type is now all that is stored in sc_type. This is zero when the
> state is first added to the idr (causing it to be ignored), and if then
> set appropriately once it is fully initialised. It is set under
> ->cl_lock to ensure atomicity w.r.t lookup. It is (now) never cleared.
>
> This is still a bit-set even though at most one bit is set. This allows
> lookup functions to be given a bitmap of acceptable types.
>
> cl_type is now an unsigned short rather than char. There is no value in
> restricting to just 8 bits.
>
> The status is stored in a separate short named "cl_status". It has two
> flags: NFS4_STID_CLOSED and NFS4_STID_REVOKED.
> CLOSED combines NFS4_CLOSED_STID, NFS4_CLOSED_DELEG_STID, and is used
> for LOCK_STID instead of setting the sc_type to zero.
> These flags are only ever set, never cleared.
> For deleg stateids they are set under the global state_lock.
> For open and lock stateids they are set under ->cl_lock.
>
> Other changes here - some of which could be split out...
>
> - When a delegation is revoked, the type was previously set to
> NFS4_CLOSED_DELEG_STID and then NFS4_REVOKED_DELEG_STID.
> This might open a race window. That window no longer exists.
>
> - NFS4_STID_REVOKED (previously NFS4_REVOKED_DELEG_STID) is only set
> for ->minorversion>0, so I removed all the tests on minorversion when
> that status has been detected.
>
> - nfs4_unhash_stid() has been removed, and we never set sc_type = 0.
> This was only used for LOCK stids and they now use NFS4_STID_CLOSED
>
> - ->cl_lock is now hel when hash_delegation_locked() is called, so
> that the locking rules for setting ->sc_type are followed.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfsd/nfs4layouts.c | 4 +-
> fs/nfsd/nfs4state.c | 165 ++++++++++++++++++++----------------------
> fs/nfsd/state.h | 37 +++++++---
> fs/nfsd/trace.h | 25 +++----
> 4 files changed, 119 insertions(+), 112 deletions(-)
>
> diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
> index 5e8096bc5eaa..678bef3a7f15 100644
> --- a/fs/nfsd/nfs4layouts.c
> +++ b/fs/nfsd/nfs4layouts.c
> @@ -269,13 +269,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
> {
> struct nfs4_layout_stateid *ls;
> struct nfs4_stid *stid;
> - unsigned char typemask = NFS4_LAYOUT_STID;
> + unsigned short typemask = NFS4_LAYOUT_STID;
> __be32 status;
>
> if (create)
> typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
>
> - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
> + status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
> net_generic(SVC_NET(rqstp), nfsd_net_id));
> if (status)
> goto out;
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 65fd5510323a..551a86a796ed 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1265,11 +1265,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
> nfs4_put_stid(&dp->dl_stid);
> }
>
> -void nfs4_unhash_stid(struct nfs4_stid *s)
> -{
> - s->sc_type = 0;
> -}
> -
> /**
> * nfs4_delegation_exists - Discover if this delegation already exists
> * @clp: a pointer to the nfs4_client we're granting a delegation to
> @@ -1317,6 +1312,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
>
> lockdep_assert_held(&state_lock);
> lockdep_assert_held(&fp->fi_lock);
> + lockdep_assert_held(&clp->cl_lock);
>
> if (nfs4_delegation_exists(clp, fp))
> return -EAGAIN;
> @@ -1333,7 +1329,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
> }
>
> static bool
> -unhash_delegation_locked(struct nfs4_delegation *dp)
> +unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short state)
> {
> struct nfs4_file *fp = dp->dl_stid.sc_file;
>
> @@ -1342,7 +1338,10 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
> if (!delegation_hashed(dp))
> return false;
>
> - dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
> + if (dp->dl_stid.sc_client->cl_minorversion == 0)
> + state = NFS4_STID_CLOSED;
> + dp->dl_stid.sc_status |= state | NFS4_STID_CLOSED;
> +
> /* Ensure that deleg break won't try to requeue it */
> ++dp->dl_time;
> spin_lock(&fp->fi_lock);
> @@ -1358,7 +1357,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
> bool unhashed;
>
> spin_lock(&state_lock);
> - unhashed = unhash_delegation_locked(dp);
> + unhashed = unhash_delegation_locked(dp, NFS4_STID_CLOSED);
> spin_unlock(&state_lock);
> if (unhashed)
> destroy_unhashed_deleg(dp);
> @@ -1372,9 +1371,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
>
> trace_nfsd_stid_revoke(&dp->dl_stid);
>
> - if (clp->cl_minorversion) {
> + if (dp->dl_stid.sc_status & NFS4_STID_REVOKED) {
> spin_lock(&clp->cl_lock);
> - dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
> refcount_inc(&dp->dl_stid.sc_count);
> list_add(&dp->dl_recall_lru, &clp->cl_revoked);
> spin_unlock(&clp->cl_lock);
> @@ -1382,8 +1380,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
> destroy_unhashed_deleg(dp);
> }
>
> -/*
> - * SETCLIENTID state
> +/*
> + * SETCLIENTID state
> */
>
> static unsigned int clientid_hashval(u32 id)
> @@ -1546,7 +1544,6 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
> if (!unhash_ol_stateid(stp))
> return false;
> list_del_init(&stp->st_locks);
> - nfs4_unhash_stid(&stp->st_stid);
> return true;
> }
>
> @@ -1557,6 +1554,7 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp)
>
> spin_lock(&clp->cl_lock);
> unhashed = unhash_lock_stateid(stp);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> spin_unlock(&clp->cl_lock);
> if (unhashed)
> nfs4_put_stid(&stp->st_stid);
> @@ -1625,6 +1623,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
> LIST_HEAD(reaplist);
>
> spin_lock(&stp->st_stid.sc_client->cl_lock);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> if (unhash_open_stateid(stp, &reaplist))
> put_ol_stateid_locked(stp, &reaplist);
> spin_unlock(&stp->st_stid.sc_client->cl_lock);
> @@ -2233,7 +2232,7 @@ __destroy_client(struct nfs4_client *clp)
> spin_lock(&state_lock);
> while (!list_empty(&clp->cl_delegations)) {
> dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> @@ -2465,14 +2464,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
> }
>
> static struct nfs4_stid *
> -find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
> +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
> + unsigned short typemask, unsigned short ok_states)
> {
> struct nfs4_stid *s;
>
> spin_lock(&cl->cl_lock);
> s = find_stateid_locked(cl, t);
> if (s != NULL) {
> - if (typemask & s->sc_type)
> + if ((s->sc_status & ~ok_states) == 0 &&
> + (typemask & s->sc_type))
> refcount_inc(&s->sc_count);
> else
> s = NULL;
> @@ -4582,7 +4583,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
> continue;
> if (local->st_stateowner != &oo->oo_owner)
> continue;
> - if (local->st_stid.sc_type == NFS4_OPEN_STID) {
> + if (local->st_stid.sc_type == NFS4_OPEN_STID &&
> + !(local->st_stid.sc_status & NFS4_STID_CLOSED)) {
> ret = local;
> refcount_inc(&ret->st_stid.sc_count);
> break;
> @@ -4596,17 +4598,10 @@ nfsd4_verify_open_stid(struct nfs4_stid *s)
> {
> __be32 ret = nfs_ok;
>
> - switch (s->sc_type) {
> - default:
> - break;
> - case 0:
> - case NFS4_CLOSED_STID:
> - case NFS4_CLOSED_DELEG_STID:
> - ret = nfserr_bad_stateid;
> - break;
> - case NFS4_REVOKED_DELEG_STID:
> + if (s->sc_status & NFS4_STID_REVOKED)
> ret = nfserr_deleg_revoked;
> - }
> + else if (s->sc_status & NFS4_STID_CLOSED)
> + ret = nfserr_bad_stateid;
> return ret;
> }
>
> @@ -4919,9 +4914,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
>
> trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
>
> - if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
> - dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
> - return 1;
> + if (dp->dl_stid.sc_status)
> + /* CLOSED or REVOKED */
> + return 1;
>
> switch (task->tk_status) {
> case 0:
> @@ -5170,8 +5165,7 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
> {
> struct nfs4_stid *ret;
>
> - ret = find_stateid_by_type(cl, s,
> - NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
> + ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID, 0);
> if (!ret)
> return NULL;
> return delegstateid(ret);
> @@ -5194,10 +5188,9 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
> deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
> if (deleg == NULL)
> goto out;
> - if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
> + if (deleg->dl_stid.sc_status & NFS4_STID_REVOKED) {
> nfs4_put_stid(&deleg->dl_stid);
> - if (cl->cl_minorversion)
> - status = nfserr_deleg_revoked;
> + status = nfserr_deleg_revoked;
> goto out;
> }
> flags = share_access_to_flags(open->op_share_access);
> @@ -5609,12 +5602,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
> goto out_unlock;
>
> spin_lock(&state_lock);
> + spin_lock(&clp->cl_lock);
> spin_lock(&fp->fi_lock);
> if (fp->fi_had_conflict)
> status = -EAGAIN;
> else
> status = hash_delegation_locked(dp, fp);
> spin_unlock(&fp->fi_lock);
> + spin_unlock(&clp->cl_lock);
> spin_unlock(&state_lock);
>
> if (status)
> @@ -5840,7 +5835,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> } else {
> status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
> if (status) {
> - stp->st_stid.sc_type = NFS4_CLOSED_STID;
> release_open_stateid(stp);
> mutex_unlock(&stp->st_mutex);
> goto out;
> @@ -6232,7 +6226,7 @@ nfs4_laundromat(struct nfsd_net *nn)
> dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> if (!state_expired(&lt, dp->dl_time))
> break;
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_REVOKED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> @@ -6471,22 +6465,20 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
> if (status)
> goto out_unlock;
> + status = nfsd4_verify_open_stid(s);
> + if (status)
> + goto out_unlock;
> +
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> status = nfs_ok;
> break;
> - case NFS4_REVOKED_DELEG_STID:
> - status = nfserr_deleg_revoked;
> - break;
> case NFS4_OPEN_STID:
> case NFS4_LOCK_STID:
> status = nfsd4_check_openowner_confirmed(openlockstateid(s));
> break;
> default:
> printk("unknown stateid type %x\n", s->sc_type);
> - fallthrough;
> - case NFS4_CLOSED_STID:
> - case NFS4_CLOSED_DELEG_STID:
> status = nfserr_bad_stateid;
> }
> out_unlock:
> @@ -6496,7 +6488,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>
> __be32
> nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> + stateid_t *stateid,
> + unsigned short typemask, unsigned short statemask,
> struct nfs4_stid **s, struct nfsd_net *nn)
> {
> __be32 status;
> @@ -6507,10 +6500,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> * only return revoked delegations if explicitly asked.
> * otherwise we report revoked or bad_stateid status.
> */
> - if (typemask & NFS4_REVOKED_DELEG_STID)
> + if (statemask & NFS4_STID_REVOKED)
> return_revoked = true;
> - else if (typemask & NFS4_DELEG_STID)
> - typemask |= NFS4_REVOKED_DELEG_STID;
>
> if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
> CLOSE_STATEID(stateid))
> @@ -6523,14 +6514,13 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> }
> if (status)
> return status;
> - stid = find_stateid_by_type(cstate->clp, stateid, typemask);
> + stid = find_stateid_by_type(cstate->clp, stateid,
> + typemask, statemask & NFS4_STID_CLOSED);
> if (!stid)
> return nfserr_bad_stateid;
> - if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
> + if ((stid->sc_status & NFS4_STID_REVOKED) && !return_revoked) {
> nfs4_put_stid(stid);
> - if (cstate->minorversion)
> - return nfserr_deleg_revoked;
> - return nfserr_bad_stateid;
> + return nfserr_deleg_revoked;
> }
> *s = stid;
> return nfs_ok;
> @@ -6541,7 +6531,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
> {
> struct nfsd_file *ret = NULL;
>
> - if (!s)
> + if (!s || s->sc_status)
> return NULL;
>
> switch (s->sc_type) {
> @@ -6664,7 +6654,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
> goto out;
>
> *stid = find_stateid_by_type(found, &cps->cp_p_stateid,
> - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
> + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
> + 0);
> if (*stid)
> status = nfs_ok;
> else
> @@ -6722,7 +6713,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
>
> status = nfsd4_lookup_stateid(cstate, stateid,
> NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
> - &s, nn);
> + 0, &s, nn);
> if (status == nfserr_bad_stateid)
> status = find_cpntf_state(nn, stateid, &s);
> if (status)
> @@ -6740,9 +6731,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> case NFS4_LOCK_STID:
> status = nfs4_check_olstateid(openlockstateid(s), flags);
> break;
> - default:
> - status = nfserr_bad_stateid;
> - break;
> }
> if (status)
> goto out;
> @@ -6821,11 +6809,20 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>
> spin_lock(&cl->cl_lock);
> s = find_stateid_locked(cl, stateid);
> - if (!s)
> + if (!s || s->sc_status & NFS4_STID_CLOSED)
> goto out_unlock;
> spin_lock(&s->sc_lock);
> switch (s->sc_type) {
> case NFS4_DELEG_STID:
> + if (s->sc_status & NFS4_STID_REVOKED) {
> + spin_unlock(&s->sc_lock);
> + dp = delegstateid(s);
> + list_del_init(&dp->dl_recall_lru);
> + spin_unlock(&cl->cl_lock);
> + nfs4_put_stid(s);
> + ret = nfs_ok;
> + goto out;
> + }
> ret = nfserr_locks_held;
> break;
> case NFS4_OPEN_STID:
> @@ -6840,14 +6837,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> spin_unlock(&cl->cl_lock);
> ret = nfsd4_free_lock_stateid(stateid, s);
> goto out;
> - case NFS4_REVOKED_DELEG_STID:
> - spin_unlock(&s->sc_lock);
> - dp = delegstateid(s);
> - list_del_init(&dp->dl_recall_lru);
> - spin_unlock(&cl->cl_lock);
> - nfs4_put_stid(s);
> - ret = nfs_ok;
> - goto out;
> /* Default falls through and returns nfserr_bad_stateid */
> }
> spin_unlock(&s->sc_lock);
> @@ -6890,6 +6879,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> * @seqid: seqid (provided by client)
> * @stateid: stateid (provided by client)
> * @typemask: mask of allowable types for this operation
> + * @statemask: mask of allowed states: 0 or STID_CLOSED
> * @stpp: return pointer for the stateid found
> * @nn: net namespace for request
> *
> @@ -6899,7 +6889,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> */
> static __be32
> nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> - stateid_t *stateid, char typemask,
> + stateid_t *stateid,
> + unsigned short typemask, unsigned short statemask,
> struct nfs4_ol_stateid **stpp,
> struct nfsd_net *nn)
> {
> @@ -6910,7 +6901,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
> trace_nfsd_preprocess(seqid, stateid);
>
> *stpp = NULL;
> - status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
> + status = nfsd4_lookup_stateid(cstate, stateid,
> + typemask, statemask, &s, nn);
> if (status)
> return status;
> stp = openlockstateid(s);
> @@ -6932,7 +6924,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
> struct nfs4_ol_stateid *stp;
>
> status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
> - NFS4_OPEN_STID, &stp, nn);
> + NFS4_OPEN_STID, 0, &stp, nn);
> if (status)
> return status;
> oo = openowner(stp->st_stateowner);
> @@ -6963,8 +6955,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> return status;
>
> status = nfs4_preprocess_seqid_op(cstate,
> - oc->oc_seqid, &oc->oc_req_stateid,
> - NFS4_OPEN_STID, &stp, nn);
> + oc->oc_seqid, &oc->oc_req_stateid,
> + NFS4_OPEN_STID, 0, &stp, nn);
> if (status)
> goto out;
> oo = openowner(stp->st_stateowner);
> @@ -7094,18 +7086,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> struct net *net = SVC_NET(rqstp);
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>
> - dprintk("NFSD: nfsd4_close on file %pd\n",
> + dprintk("NFSD: nfsd4_close on file %pd\n",
> cstate->current_fh.fh_dentry);
>
> status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
> - &close->cl_stateid,
> - NFS4_OPEN_STID|NFS4_CLOSED_STID,
> - &stp, nn);
> + &close->cl_stateid,
> + NFS4_OPEN_STID, NFS4_STID_CLOSED,
> + &stp, nn);
> nfsd4_bump_seqid(cstate, status);
> if (status)
> - goto out;
> + goto out;
>
> - stp->st_stid.sc_type = NFS4_CLOSED_STID;
> + spin_lock(&stp->st_stid.sc_client->cl_lock);
> + stp->st_stid.sc_status |= NFS4_STID_CLOSED;
> + spin_unlock(&stp->st_stid.sc_client->cl_lock);
>
> /*
> * Technically we don't _really_ have to increment or copy it, since
> @@ -7147,7 +7141,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
> return status;
>
> - status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
> + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, 0, &s, nn);
> if (status)
> goto out;
> dp = delegstateid(s);
> @@ -7601,9 +7595,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> &lock_stp, &new);
> } else {
> status = nfs4_preprocess_seqid_op(cstate,
> - lock->lk_old_lock_seqid,
> - &lock->lk_old_lock_stateid,
> - NFS4_LOCK_STID, &lock_stp, nn);
> + lock->lk_old_lock_seqid,
> + &lock->lk_old_lock_stateid,
> + NFS4_LOCK_STID, 0, &lock_stp,
> + nn);
> }
> if (status)
> goto out;
> @@ -7916,8 +7911,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> return nfserr_inval;
>
> status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
> - &locku->lu_stateid, NFS4_LOCK_STID,
> - &stp, nn);
> + &locku->lu_stateid, NFS4_LOCK_STID, 0,
> + &stp, nn);
> if (status)
> goto out;
> nf = find_any_file(stp->st_stid.sc_file);
> @@ -8347,7 +8342,7 @@ nfs4_state_shutdown_net(struct net *net)
> spin_lock(&state_lock);
> list_for_each_safe(pos, next, &nn->del_recall_lru) {
> dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
> - WARN_ON(!unhash_delegation_locked(dp));
> + WARN_ON(!unhash_delegation_locked(dp, NFS4_STID_CLOSED));
> list_add(&dp->dl_recall_lru, &reaplist);
> }
> spin_unlock(&state_lock);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index f96eaa8e9413..cf89fb6be9e1 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -88,17 +88,31 @@ struct nfsd4_callback_ops {
> */
> struct nfs4_stid {
> refcount_t sc_count;
> -#define NFS4_OPEN_STID 1
> -#define NFS4_LOCK_STID 2
> -#define NFS4_DELEG_STID 4
> -/* For an open stateid kept around *only* to process close replays: */
> -#define NFS4_CLOSED_STID 8
> +
> + /* A new stateid is added to the idr early before it is
> + * fully initialised. Its sc_type is then zero.
> + * After initialisation the sc_type it set under cl_lock,
> + * and then never changes.
> + */
> +#define NFS4_OPEN_STID BIT(0)
> +#define NFS4_LOCK_STID BIT(1)
> +#define NFS4_DELEG_STID BIT(2)
> +#define NFS4_LAYOUT_STID BIT(3)
> + unsigned short sc_type;
> +/* state_lock protects sc_status for delegation stateids.
> + * ->cl_lock protects sc_status for open and lock stateids.
> + * ->st_mutex also protect sc_status for open stateids.
> + */
> +/*
> + * For an open stateid kept around *only* to process close replays.
> + * For deleg stateid, kept in idr until last reference is dropped.
> + */
> +#define NFS4_STID_CLOSED BIT(0)
> /* For a deleg stateid kept around only to process free_stateid's: */
> -#define NFS4_REVOKED_DELEG_STID 16
> -#define NFS4_CLOSED_DELEG_STID 32
> -#define NFS4_LAYOUT_STID 64
> +#define NFS4_STID_REVOKED BIT(1)
> + unsigned short sc_status;
> +
> struct list_head sc_cp_list;
> - unsigned char sc_type;
> stateid_t sc_stateid;
> spinlock_t sc_lock;
> struct nfs4_client *sc_client;
> @@ -694,8 +708,9 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
> stateid_t *stateid, int flags, struct nfsd_file **filp,
> struct nfs4_stid **cstid);
> __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> - stateid_t *stateid, unsigned char typemask,
> - struct nfs4_stid **s, struct nfsd_net *nn);
> + stateid_t *stateid, unsigned short typemask,
> + unsigned short statemask,
> + struct nfs4_stid **s, struct nfsd_net *nn);
> struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> void (*sc_free)(struct nfs4_stid *));
> int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index fbc0ccb40424..668b352faaea 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -641,31 +641,26 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
> DEFINE_STATESEQID_EVENT(preprocess);
> DEFINE_STATESEQID_EVENT(open_confirm);
>
> -TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
> -TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
> -TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
> -TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
> -TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
> -
> #define show_stid_type(x) \
> __print_flags(x, "|", \
> { NFS4_OPEN_STID, "OPEN" }, \
> { NFS4_LOCK_STID, "LOCK" }, \
> { NFS4_DELEG_STID, "DELEG" }, \
> - { NFS4_CLOSED_STID, "CLOSED" }, \
> - { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
> - { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
> { NFS4_LAYOUT_STID, "LAYOUT" })
>
> +#define show_stid_status(x) \
> + __print_flags(x, "|", \
> + { NFS4_STID_CLOSED, "CLOSED" }, \
> + { NFS4_STID_REVOKED, "REVOKED" }) \
> +
> DECLARE_EVENT_CLASS(nfsd_stid_class,
> TP_PROTO(
> const struct nfs4_stid *stid
> ),
> TP_ARGS(stid),
> TP_STRUCT__entry(
> - __field(unsigned long, sc_type)
> + __field(unsigned short, sc_type)
> + __field(unsigned short, sc_status)
> __field(int, sc_count)
> __field(u32, cl_boot)
> __field(u32, cl_id)
> @@ -676,16 +671,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
> const stateid_t *stp = &stid->sc_stateid;
>
> __entry->sc_type = stid->sc_type;
> + __entry->sc_status = stid->sc_status;
> __entry->sc_count = refcount_read(&stid->sc_count);
> __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
> __entry->cl_id = stp->si_opaque.so_clid.cl_id;
> __entry->si_id = stp->si_opaque.so_id;
> __entry->si_generation = stp->si_generation;
> ),
> - TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
> + TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
> __entry->cl_boot, __entry->cl_id,
> __entry->si_id, __entry->si_generation,
> - __entry->sc_count, show_stid_type(__entry->sc_type)
> + __entry->sc_count, show_stid_type(__entry->sc_type),
> + show_stid_status(__entry->sc_status)
> )
> );
>

I like it, overall! This fixes several ugly warts in the sc_type
handling.

I needed the patch below to get it to compile, but it seems to be
working OK so far. The patch may not be quite right, but I'm not testing
pnfs at the moment.
--
Jeff Layton <[email protected]>

-----------------8<-----------------------

[PATCH] SQUASH: nfs4_unhash_stid

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4layouts.c | 1 -
fs/nfsd/state.h | 1 -
2 files changed, 2 deletions(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 678bef3a7f15..6c8263741e01 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -518,7 +518,6 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
lrp->lrs_present = true;
} else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
- nfs4_unhash_stid(&ls->ls_stid);
lrp->lrs_present = false;
}
spin_unlock(&ls->ls_lock);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index cf89fb6be9e1..d81314a9b33a 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -717,7 +717,6 @@ int nfs4_init_copy_state(struct nfsd_net *nn, struct
nfsd4_copy *copy);
void nfs4_free_copy_state(struct nfsd4_copy *copy);
struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net
*nn,
struct nfs4_stid *p_stid);
-void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct
nfsd_net *);
--
2.41.0