NFSD: add support for NFSv4 write delegation
The NFSv4 server currently supports read delegation using VFS lease
which is implemented using file_lock.
This patch series add write delegation support for NFSv4 server by:
. remove the check for F_WRLCK in generic_add_lease to allow
file_lock to be used for write delegation.
. grant write delegation for OPEN with NFS4_SHARE_ACCESS_WRITE
if there is no conflict with other OPENs.
Write delegation conflict with another OPEN, REMOVE, RENAME and SETATTR
are handled the same as read delegation using notify_change, try_break_deleg.
Changes since v1:
[PATCH 3/4] NFSD: add supports for CB_GETATTR callback
- remove WARN_ON_ONCE from encode_bitmap4
- replace decode_bitmap4 with xdr_stream_decode_uint32_array
- replace xdr_inline_decode and xdr_decode_hyper in decode_cb_getattr
with xdr_stream_decode_u64. Also remove the un-needed likely().
- modify signature of encode_cb_getattr4args to take pointer to
nfs4_cb_fattr
- replace decode_attr_length with xdr_stream_decode_u32
- rename decode_cb_getattr to decode_cb_fattr4
- fold the initialization of cb_cinfo and cb_fsize into decode_cb_fattr4
- rename ncf_cb_cinfo to ncf_cb_change to avoid confusion of cindo usage
in fs/nfsd/nfs4xdr.c
- correct NFS4_dec_cb_getattr_sz and update size description
[PATCH 4/4] NFSD: handle GETATTR conflict with write delegation
- change nfs4_handle_wrdeleg_conflict returns __be32 to fix test robot
- change ncf_cb_cinfo to ncf_cb_change to avoid confusion of cindo usage
in fs/nfsd/nfs4xdr.c
Changes since v2:
[PATCH 2/4] NFSD: enable support for write delegation
- rename 'deleg' to 'dl_type' in nfs4_set_delegation
- remove 'wdeleg' in nfs4_open_delegation
- drop [PATCH 3/4] NFSD: add supports for CB_GETATTR callback
and [PATCH 4/4] NFSD: handle GETATTR conflict with write delegation
for futher clarification of the benefits of these patches
Changes since v3:
- recall write delegation when there is GETATTR from 2nd client
- add trace point to track when write delegation is granted
Remove the check for F_WRLCK in generic_add_lease to allow file_lock
to be used for write delegation.
First consumer is NFSD.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/locks.c | 7 -------
1 file changed, 7 deletions(-)
diff --git a/fs/locks.c b/fs/locks.c
index df8b26a42524..08fb0b4fd4f8 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1729,13 +1729,6 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
- if (is_deleg && arg == F_WRLCK) {
- /* Write delegations are not currently supported: */
- inode_unlock(inode);
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
--
2.9.5
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the write delegation is recalled and NFS4ERR_DELAY is returned
for the GETATTR.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..e069b970f136 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
return nfserr_resource;
}
+static struct file_lock *
+nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
+{
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return NULL;
+ spin_lock(&ctx->flc_lock);
+ if (!list_empty(&ctx->flc_lease)) {
+ fl = list_first_entry(&ctx->flc_lease,
+ struct file_lock, fl_list);
+ if (fl->fl_type == F_WRLCK) {
+ spin_unlock(&ctx->flc_lock);
+ return fl;
+ }
+ }
+ spin_unlock(&ctx->flc_lock);
+ return NULL;
+}
+
+static __be32
+nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+ __be32 status;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+
+ fl = nfs4_wrdeleg_filelock(rqstp, inode);
+ if (!fl)
+ return 0;
+ dp = fl->fl_owner;
+ if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
+ return 0;
+ refcount_inc(&dp->dl_stid.sc_count);
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ return status;
+}
+
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
* ourselves.
@@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
if (status)
goto out;
}
+ if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
+ if (status)
+ goto out;
+ }
err = vfs_getattr(&path, &stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
--
2.9.5
This patch grants write delegation for OPEN with NFS4_SHARE_ACCESS_WRITE
if there is no conflict with other OPENs.
Write delegation conflict with another OPEN, REMOVE, RENAME and SETATTR
are handled the same as read delegation using notify_change,
try_break_deleg.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6e61fa3acaf1..3f98b7485c72 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1144,7 +1144,7 @@ static void block_delegations(struct knfsd_fh *fh)
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
- struct nfs4_clnt_odstate *odstate)
+ struct nfs4_clnt_odstate *odstate, u32 dl_type)
{
struct nfs4_delegation *dp;
long n;
@@ -1170,7 +1170,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_clnt_odstate = odstate;
get_clnt_odstate(odstate);
- dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_type = dl_type;
dp->dl_retries = 1;
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -5451,6 +5451,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct nfs4_delegation *dp;
struct nfsd_file *nf;
struct file_lock *fl;
+ u32 dl_type;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
@@ -5460,7 +5461,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- nf = find_readable_file(fp);
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ nf = find_writeable_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_WRITE;
+ } else {
+ nf = find_readable_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_READ;
+ }
if (!nf) {
/*
* We probably could attempt another open and get a read
@@ -5491,11 +5498,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return ERR_PTR(status);
status = -ENOMEM;
- dp = alloc_init_deleg(clp, fp, odstate);
+ dp = alloc_init_deleg(clp, fp, odstate, dl_type);
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(dp, dl_type);
if (!fl)
goto out_clnt_odstate;
@@ -5590,8 +5597,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!cb_up)
open->op_recall = 1;
- if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
- goto out_no_deleg;
break;
case NFS4_OPEN_CLAIM_NULL:
parent = currentfh;
@@ -5617,7 +5622,10 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ else
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
--
2.9.5
Add trace point to track whether read or write delegation is granted.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 8 +++++---
fs/nfsd/trace.h | 1 +
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3f98b7485c72..b90b74a5e66e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5621,11 +5621,13 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
- trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
- if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
- else
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ } else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 4183819ea082..a14cf8684255 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -607,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
DEFINE_STATEID_EVENT(deleg_recall);
--
2.9.5
> On May 20, 2023, at 5:36 PM, Dai Ngo <[email protected]> wrote:
>
> NFSD: add support for NFSv4 write delegation
>
> The NFSv4 server currently supports read delegation using VFS lease
> which is implemented using file_lock.
>
> This patch series add write delegation support for NFSv4 server by:
>
> . remove the check for F_WRLCK in generic_add_lease to allow
> file_lock to be used for write delegation.
>
> . grant write delegation for OPEN with NFS4_SHARE_ACCESS_WRITE
> if there is no conflict with other OPENs.
>
> Write delegation conflict with another OPEN, REMOVE, RENAME and SETATTR
> are handled the same as read delegation using notify_change, try_break_deleg.
>
> Changes since v1:
>
> [PATCH 3/4] NFSD: add supports for CB_GETATTR callback
> - remove WARN_ON_ONCE from encode_bitmap4
> - replace decode_bitmap4 with xdr_stream_decode_uint32_array
> - replace xdr_inline_decode and xdr_decode_hyper in decode_cb_getattr
> with xdr_stream_decode_u64. Also remove the un-needed likely().
> - modify signature of encode_cb_getattr4args to take pointer to
> nfs4_cb_fattr
> - replace decode_attr_length with xdr_stream_decode_u32
> - rename decode_cb_getattr to decode_cb_fattr4
> - fold the initialization of cb_cinfo and cb_fsize into decode_cb_fattr4
> - rename ncf_cb_cinfo to ncf_cb_change to avoid confusion of cindo usage
> in fs/nfsd/nfs4xdr.c
> - correct NFS4_dec_cb_getattr_sz and update size description
>
> [PATCH 4/4] NFSD: handle GETATTR conflict with write delegation
> - change nfs4_handle_wrdeleg_conflict returns __be32 to fix test robot
> - change ncf_cb_cinfo to ncf_cb_change to avoid confusion of cindo usage
> in fs/nfsd/nfs4xdr.c
>
> Changes since v2:
>
> [PATCH 2/4] NFSD: enable support for write delegation
> - rename 'deleg' to 'dl_type' in nfs4_set_delegation
> - remove 'wdeleg' in nfs4_open_delegation
>
> - drop [PATCH 3/4] NFSD: add supports for CB_GETATTR callback
> and [PATCH 4/4] NFSD: handle GETATTR conflict with write delegation
> for futher clarification of the benefits of these patches
>
> Changes since v3:
>
> - recall write delegation when there is GETATTR from 2nd client
> - add trace point to track when write delegation is granted
>
I'll apply this series to nfsd-next. There are a handful of
changes I'd like to make (with permission):
- Squash 4/4 into 2/4
- Apply 1/4 last instead of first
- Add Jeff's Reviewed-by at least to 1/4 and 2/4
3/4 gives us a platform for measuring recalls triggered by
GETATTR. It might not make any difference if we add a new
counter for that -- but a tracepoint could also do it without
altering the kernel/userspace API. I'm still pondering it.
--
Chuck Lever
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
>
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + if (!list_empty(&ctx->flc_lease)) {
> + fl = list_first_entry(&ctx->flc_lease,
> + struct file_lock, fl_list);
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }
> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> +
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
One problem here is that you don't know whether the owner was set by
nfsd. This owner could be a struct file from a userland lease holder,
and that that point it's not safe to dereference it below like you are.
The q&d way we check for this in other places is to validate that the
fl_lmops is correct. In this case you'd want to make sure it's set to
&nfsd_lease_mng_ops.
Beyond that, you also don't know whether that owner or file_lock still
_exists_ after you drop the flc_lock. You need to either do these checks
while holding that lock, or take a reference to the owner before you
start dereferencing things.
Probably, you're better off here just doing it all under the flc_lock.
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> + refcount_inc(&dp->dl_stid.sc_count);
Another problem: the sc_count might already have gone to zero here. You
don't already hold a reference to dl_stid at this point, so you can't
just increment it without taking the cl_lock for that client.
You may be able to do this safely with refcount_inc_not_zero, and just
ignore the delegation if it's already at zero.
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + return status;
> +}
> +
> /*
> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> + if (status)
> + goto out;
> + }
>
> err = vfs_getattr(&path, &stat,
> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
--
Jeff Layton <[email protected]>
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> Remove the check for F_WRLCK in generic_add_lease to allow file_lock
> to be used for write delegation.
>
> First consumer is NFSD.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/locks.c | 7 -------
> 1 file changed, 7 deletions(-)
>
> diff --git a/fs/locks.c b/fs/locks.c
> index df8b26a42524..08fb0b4fd4f8 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1729,13 +1729,6 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
> if (is_deleg && !inode_trylock(inode))
> return -EAGAIN;
>
> - if (is_deleg && arg == F_WRLCK) {
> - /* Write delegations are not currently supported: */
> - inode_unlock(inode);
> - WARN_ON_ONCE(1);
> - return -EINVAL;
> - }
> -
> percpu_down_read(&file_rwsem);
> spin_lock(&ctx->flc_lock);
> time_out_leases(inode, &dispose);
Reviewed-by: Jeff Layton <[email protected]>
> On May 20, 2023, at 5:36 PM, Dai Ngo <[email protected]> wrote:
>
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
>
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + if (!list_empty(&ctx->flc_lease)) {
> + fl = list_first_entry(&ctx->flc_lease,
> + struct file_lock, fl_list);
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }
> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> +
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> + refcount_inc(&dp->dl_stid.sc_count);
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + return status;
> +}
> +
fs/nfsd/nfs4state.c seems more appropriate for these.
I'll move them as I apply this patch.
> /*
> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> + if (status)
> + goto out;
> + }
>
> err = vfs_getattr(&path, &stat,
> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> --
> 2.9.5
>
--
Chuck Lever
On 5/21/23 9:49 AM, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..e069b970f136 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>> return nfserr_resource;
>> }
>>
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + struct file_lock_context *ctx;
>> + struct file_lock *fl;
>> +
>> + ctx = locks_inode_context(inode);
>> + if (!ctx)
>> + return NULL;
>> + spin_lock(&ctx->flc_lock);
>> + if (!list_empty(&ctx->flc_lease)) {
>> + fl = list_first_entry(&ctx->flc_lease,
>> + struct file_lock, fl_list);
>> + if (fl->fl_type == F_WRLCK) {
>> + spin_unlock(&ctx->flc_lock);
>> + return fl;
>> + }
>> + }
>> + spin_unlock(&ctx->flc_lock);
>> + return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + __be32 status;
>> + struct file_lock *fl;
>> + struct nfs4_delegation *dp;
>> +
>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> + if (!fl)
>> + return 0;
>> + dp = fl->fl_owner;
> One problem here is that you don't know whether the owner was set by
> nfsd. This owner could be a struct file from a userland lease holder,
> and that that point it's not safe to dereference it below like you are.
>
> The q&d way we check for this in other places is to validate that the
> fl_lmops is correct. In this case you'd want to make sure it's set to
> &nfsd_lease_mng_ops.
Thanks Jeff, fix in v5.
>
> Beyond that, you also don't know whether that owner or file_lock still
> _exists_ after you drop the flc_lock. You need to either do these checks
> while holding that lock, or take a reference to the owner before you
> start dereferencing things.
>
> Probably, you're better off here just doing it all under the flc_lock.
fix in v5.
>
>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> + return 0;
>> + refcount_inc(&dp->dl_stid.sc_count);
> Another problem: the sc_count might already have gone to zero here. You
> don't already hold a reference to dl_stid at this point, so you can't
> just increment it without taking the cl_lock for that client.
>
> You may be able to do this safely with refcount_inc_not_zero, and just
> ignore the delegation if it's already at zero.
Fix in v5.
Chuck, I can do v5 to address feedback from you and Jeff.
Thanks,
-Dai
>
>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> + return status;
>> +}
>> +
>> /*
>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>> * ourselves.
>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> if (status)
>> goto out;
>> }
>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> + if (status)
>> + goto out;
>> + }
>>
>> err = vfs_getattr(&path, &stat,
>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> If the GETATTR request on a file that has write delegation in effect
> and the request attributes include the change info and size attribute
> then the write delegation is recalled and NFS4ERR_DELAY is returned
> for the GETATTR.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 45 insertions(+)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 76db2fe29624..e069b970f136 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> return nfserr_resource;
> }
>
> +static struct file_lock *
> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + struct file_lock_context *ctx;
> + struct file_lock *fl;
> +
> + ctx = locks_inode_context(inode);
> + if (!ctx)
> + return NULL;
> + spin_lock(&ctx->flc_lock);
> + if (!list_empty(&ctx->flc_lease)) {
> + fl = list_first_entry(&ctx->flc_lease,
> + struct file_lock, fl_list);
> + if (fl->fl_type == F_WRLCK) {
> + spin_unlock(&ctx->flc_lock);
> + return fl;
> + }
> + }
> + spin_unlock(&ctx->flc_lock);
> + return NULL;
> +}
> +
> +static __be32
> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> +{
> + __be32 status;
> + struct file_lock *fl;
> + struct nfs4_delegation *dp;
> +
> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> + if (!fl)
> + return 0;
> + dp = fl->fl_owner;
> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> + return 0;
> + refcount_inc(&dp->dl_stid.sc_count);
Another question: Why are you taking a reference here at all? AFAICT,
you don't even look at the delegation again after that point, so it's
not clear to me who's responsible for putting that reference.
> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> + return status;
> +}
> +
> /*
> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> * ourselves.
> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> if (status)
> goto out;
> }
> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> + if (status)
> + goto out;
> + }
>
> err = vfs_getattr(&path, &stat,
> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
--
Jeff Layton <[email protected]>
On Sun, May 21, 2023 at 07:08:43PM -0400, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> > If the GETATTR request on a file that has write delegation in effect
> > and the request attributes include the change info and size attribute
> > then the write delegation is recalled and NFS4ERR_DELAY is returned
> > for the GETATTR.
> >
> > Signed-off-by: Dai Ngo <[email protected]>
> > ---
> > fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 45 insertions(+)
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 76db2fe29624..e069b970f136 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
> > return nfserr_resource;
> > }
> >
> > +static struct file_lock *
> > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > +{
> > + struct file_lock_context *ctx;
> > + struct file_lock *fl;
> > +
> > + ctx = locks_inode_context(inode);
> > + if (!ctx)
> > + return NULL;
> > + spin_lock(&ctx->flc_lock);
> > + if (!list_empty(&ctx->flc_lease)) {
> > + fl = list_first_entry(&ctx->flc_lease,
> > + struct file_lock, fl_list);
> > + if (fl->fl_type == F_WRLCK) {
> > + spin_unlock(&ctx->flc_lock);
> > + return fl;
> > + }
> > + }
> > + spin_unlock(&ctx->flc_lock);
> > + return NULL;
> > +}
> > +
> > +static __be32
> > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
> > +{
> > + __be32 status;
> > + struct file_lock *fl;
> > + struct nfs4_delegation *dp;
> > +
> > + fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > + if (!fl)
> > + return 0;
> > + dp = fl->fl_owner;
> > + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > + return 0;
> > + refcount_inc(&dp->dl_stid.sc_count);
>
> Another question: Why are you taking a reference here at all? AFAICT,
> you don't even look at the delegation again after that point, so it's
> not clear to me who's responsible for putting that reference.
I had the same thought, but I assumed I was missing something.
> > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
> > + return status;
> > +}
> > +
> > /*
> > * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
> > * ourselves.
> > @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> > if (status)
> > goto out;
> > }
> > + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
> > + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
> > + if (status)
> > + goto out;
> > + }
> >
> > err = vfs_getattr(&path, &stat,
> > STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
>
> --
> Jeff Layton <[email protected]>
On 5/21/23 4:08 PM, Jeff Layton wrote:
> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>> If the GETATTR request on a file that has write delegation in effect
>> and the request attributes include the change info and size attribute
>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>> for the GETATTR.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 45 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>> index 76db2fe29624..e069b970f136 100644
>> --- a/fs/nfsd/nfs4xdr.c
>> +++ b/fs/nfsd/nfs4xdr.c
>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>> return nfserr_resource;
>> }
>>
>> +static struct file_lock *
>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + struct file_lock_context *ctx;
>> + struct file_lock *fl;
>> +
>> + ctx = locks_inode_context(inode);
>> + if (!ctx)
>> + return NULL;
>> + spin_lock(&ctx->flc_lock);
>> + if (!list_empty(&ctx->flc_lease)) {
>> + fl = list_first_entry(&ctx->flc_lease,
>> + struct file_lock, fl_list);
>> + if (fl->fl_type == F_WRLCK) {
>> + spin_unlock(&ctx->flc_lock);
>> + return fl;
>> + }
>> + }
>> + spin_unlock(&ctx->flc_lock);
>> + return NULL;
>> +}
>> +
>> +static __be32
>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>> +{
>> + __be32 status;
>> + struct file_lock *fl;
>> + struct nfs4_delegation *dp;
>> +
>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>> + if (!fl)
>> + return 0;
>> + dp = fl->fl_owner;
>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>> + return 0;
>> + refcount_inc(&dp->dl_stid.sc_count);
> Another question: Why are you taking a reference here at all?
This is same as in nfsd_break_one_deleg and revoke_delegation.
I think it is to prevent the delegation to be freed while delegation
is being recalled.
> AFAICT,
> you don't even look at the delegation again after that point, so it's
> not clear to me who's responsible for putting that reference.
In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
in V4. I'll add it back in v5.
Thanks,
-Dai
>
>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>> + return status;
>> +}
>> +
>> /*
>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>> * ourselves.
>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>> if (status)
>> goto out;
>> }
>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>> + if (status)
>> + goto out;
>> + }
>>
>> err = vfs_getattr(&path, &stat,
>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
On 5/21/23 7:56 PM, [email protected] wrote:
>
> On 5/21/23 4:08 PM, Jeff Layton wrote:
>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>> If the GETATTR request on a file that has write delegation in effect
>>> and the request attributes include the change info and size attribute
>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>> for the GETATTR.
>>>
>>> Signed-off-by: Dai Ngo <[email protected]>
>>> ---
>>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 45 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 76db2fe29624..e069b970f136 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr,
>>> u32 bmval0, u32 bmval1, u32 bmval2)
>>> return nfserr_resource;
>>> }
>>> +static struct file_lock *
>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>> +{
>>> + struct file_lock_context *ctx;
>>> + struct file_lock *fl;
>>> +
>>> + ctx = locks_inode_context(inode);
>>> + if (!ctx)
>>> + return NULL;
>>> + spin_lock(&ctx->flc_lock);
>>> + if (!list_empty(&ctx->flc_lease)) {
>>> + fl = list_first_entry(&ctx->flc_lease,
>>> + struct file_lock, fl_list);
>>> + if (fl->fl_type == F_WRLCK) {
>>> + spin_unlock(&ctx->flc_lock);
>>> + return fl;
>>> + }
>>> + }
>>> + spin_unlock(&ctx->flc_lock);
>>> + return NULL;
>>> +}
>>> +
>>> +static __be32
>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode
>>> *inode)
>>> +{
>>> + __be32 status;
>>> + struct file_lock *fl;
>>> + struct nfs4_delegation *dp;
>>> +
>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>> + if (!fl)
>>> + return 0;
>>> + dp = fl->fl_owner;
>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>> + return 0;
>>> + refcount_inc(&dp->dl_stid.sc_count);
>> Another question: Why are you taking a reference here at all?
>
> This is same as in nfsd_break_one_deleg and revoke_delegation.
> I think it is to prevent the delegation to be freed while delegation
> is being recalled.
>
>> AFAICT,
>> you don't even look at the delegation again after that point, so it's
>> not clear to me who's responsible for putting that reference.
>
> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
> in V4. I'll add it back in v5.
Actually the refcount is decremented after the CB_RECALL is done
by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
decrement it here. This is to prevent the delegation to be free
while the recall is being sent.
-Dai
>
> Thanks,
> -Dai
>
>>
>>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>>> + return status;
>>> +}
>>> +
>>> /*
>>> * Note: @fhp can be NULL; in this case, we might have to compose
>>> the filehandle
>>> * ourselves.
>>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr,
>>> struct svc_fh *fhp,
>>> if (status)
>>> goto out;
>>> }
>>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>>> + if (status)
>>> + goto out;
>>> + }
>>> err = vfs_getattr(&path, &stat,
>>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
> On May 21, 2023, at 11:56 PM, Dai Ngo <[email protected]> wrote:
>
>
> On 5/21/23 7:56 PM, [email protected] wrote:
>>
>> On 5/21/23 4:08 PM, Jeff Layton wrote:
>>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>>> If the GETATTR request on a file that has write delegation in effect
>>>> and the request attributes include the change info and size attribute
>>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>>> for the GETATTR.
>>>>
>>>> Signed-off-by: Dai Ngo <[email protected]>
>>>> ---
>>>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>> 1 file changed, 45 insertions(+)
>>>>
>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>> index 76db2fe29624..e069b970f136 100644
>>>> --- a/fs/nfsd/nfs4xdr.c
>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
>>>> return nfserr_resource;
>>>> }
>>>> +static struct file_lock *
>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>>> +{
>>>> + struct file_lock_context *ctx;
>>>> + struct file_lock *fl;
>>>> +
>>>> + ctx = locks_inode_context(inode);
>>>> + if (!ctx)
>>>> + return NULL;
>>>> + spin_lock(&ctx->flc_lock);
>>>> + if (!list_empty(&ctx->flc_lease)) {
>>>> + fl = list_first_entry(&ctx->flc_lease,
>>>> + struct file_lock, fl_list);
>>>> + if (fl->fl_type == F_WRLCK) {
>>>> + spin_unlock(&ctx->flc_lock);
>>>> + return fl;
>>>> + }
>>>> + }
>>>> + spin_unlock(&ctx->flc_lock);
>>>> + return NULL;
>>>> +}
>>>> +
>>>> +static __be32
>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode)
>>>> +{
>>>> + __be32 status;
>>>> + struct file_lock *fl;
>>>> + struct nfs4_delegation *dp;
>>>> +
>>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>>> + if (!fl)
>>>> + return 0;
>>>> + dp = fl->fl_owner;
>>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>>> + return 0;
>>>> + refcount_inc(&dp->dl_stid.sc_count);
>>> Another question: Why are you taking a reference here at all?
>>
>> This is same as in nfsd_break_one_deleg and revoke_delegation.
>> I think it is to prevent the delegation to be freed while delegation
>> is being recalled.
>>
>>> AFAICT,
>>> you don't even look at the delegation again after that point, so it's
>>> not clear to me who's responsible for putting that reference.
>>
>> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
>> in V4. I'll add it back in v5.
>
> Actually the refcount is decremented after the CB_RECALL is done
> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
> decrement it here. This is to prevent the delegation to be free
> while the recall is being sent.
For v5, please add this good information to the documenting comment
for this function.
> -Dai
>
>>
>> Thanks,
>> -Dai
>>
>>>
>>>> + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
>>>> + return status;
>>>> +}
>>>> +
>>>> /*
>>>> * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
>>>> * ourselves.
>>>> @@ -2966,6 +3006,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
>>>> if (status)
>>>> goto out;
>>>> }
>>>> + if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
>>>> + status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry));
>>>> + if (status)
>>>> + goto out;
>>>> + }
>>>> err = vfs_getattr(&path, &stat,
>>>> STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
--
Chuck Lever
On Sun, 2023-05-21 at 20:56 -0700, [email protected] wrote:
> On 5/21/23 7:56 PM, [email protected] wrote:
> >
> > On 5/21/23 4:08 PM, Jeff Layton wrote:
> > > On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
> > > > If the GETATTR request on a file that has write delegation in effect
> > > > and the request attributes include the change info and size attribute
> > > > then the write delegation is recalled and NFS4ERR_DELAY is returned
> > > > for the GETATTR.
> > > >
> > > > Signed-off-by: Dai Ngo <[email protected]>
> > > > ---
> > > > ? fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> > > > ? 1 file changed, 45 insertions(+)
> > > >
> > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > > index 76db2fe29624..e069b970f136 100644
> > > > --- a/fs/nfsd/nfs4xdr.c
> > > > +++ b/fs/nfsd/nfs4xdr.c
> > > > @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr,
> > > > u32 bmval0, u32 bmval1, u32 bmval2)
> > > > ????? return nfserr_resource;
> > > > ? }
> > > > ? +static struct file_lock *
> > > > +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
> > > > +{
> > > > +??? struct file_lock_context *ctx;
> > > > +??? struct file_lock *fl;
> > > > +
> > > > +??? ctx = locks_inode_context(inode);
> > > > +??? if (!ctx)
> > > > +??????? return NULL;
> > > > +??? spin_lock(&ctx->flc_lock);
> > > > +??? if (!list_empty(&ctx->flc_lease)) {
> > > > +??????? fl = list_first_entry(&ctx->flc_lease,
> > > > +??????????????????? struct file_lock, fl_list);
> > > > +??????? if (fl->fl_type == F_WRLCK) {
> > > > +??????????? spin_unlock(&ctx->flc_lock);
> > > > +??????????? return fl;
> > > > +??????? }
One more issue here too. FL_LAYOUT file_locks live on this list too.
They shouldn't conflict with leases or delegations, so you probably just
want to skip them.
Longer term, I wonder if we ought to add a new list in the
file_lock_context for layouts? There's no reason to keep them all on the
same list.
> > > > +??? }
> > > > +??? spin_unlock(&ctx->flc_lock);
> > > > +??? return NULL;
> > > > +}
> > > > +
> > > > +static __be32
> > > > +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode
> > > > *inode)
> > > > +{
> > > > +??? __be32 status;
> > > > +??? struct file_lock *fl;
> > > > +??? struct nfs4_delegation *dp;
> > > > +
> > > > +??? fl = nfs4_wrdeleg_filelock(rqstp, inode);
> > > > +??? if (!fl)
> > > > +??????? return 0;
> > > > +??? dp = fl->fl_owner;
> > > > +??? if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
> > > > +??????? return 0;
> > > > +??? refcount_inc(&dp->dl_stid.sc_count);
> > > Another question: Why are you taking a reference here at all?
> >
> > This is same as in nfsd_break_one_deleg and revoke_delegation.
> > I think it is to prevent the delegation to be freed while delegation
> > is being recalled.
> >
> > > ? AFAICT,
> > > you don't even look at the delegation again after that point, so it's
> > > not clear to me who's responsible for putting that reference.
> >
> > In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
> > in V4. I'll add it back in v5.
>
> Actually the refcount is decremented after the CB_RECALL is done
> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
> decrement it here. This is to prevent the delegation to be free
> while the recall is being sent.
>
That's the put for the increment in nfsd_break_one_deleg.
You don't need to take an extra reference to a delegation to call
nfsd_open_break_lease. You might not even know which delegation is being
broken. There could even be more than one, after all.
I think that extra refcount_inc is likely to cause a leak.
--
Jeff Layton <[email protected]>
On 5/22/23 6:49 AM, Jeff Layton wrote:
> On Sun, 2023-05-21 at 20:56 -0700, [email protected] wrote:
>> On 5/21/23 7:56 PM, [email protected] wrote:
>>> On 5/21/23 4:08 PM, Jeff Layton wrote:
>>>> On Sat, 2023-05-20 at 14:36 -0700, Dai Ngo wrote:
>>>>> If the GETATTR request on a file that has write delegation in effect
>>>>> and the request attributes include the change info and size attribute
>>>>> then the write delegation is recalled and NFS4ERR_DELAY is returned
>>>>> for the GETATTR.
>>>>>
>>>>> Signed-off-by: Dai Ngo <[email protected]>
>>>>> ---
>>>>> fs/nfsd/nfs4xdr.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
>>>>> 1 file changed, 45 insertions(+)
>>>>>
>>>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>>>> index 76db2fe29624..e069b970f136 100644
>>>>> --- a/fs/nfsd/nfs4xdr.c
>>>>> +++ b/fs/nfsd/nfs4xdr.c
>>>>> @@ -2920,6 +2920,46 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr,
>>>>> u32 bmval0, u32 bmval1, u32 bmval2)
>>>>> return nfserr_resource;
>>>>> }
>>>>> +static struct file_lock *
>>>>> +nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
>>>>> +{
>>>>> + struct file_lock_context *ctx;
>>>>> + struct file_lock *fl;
>>>>> +
>>>>> + ctx = locks_inode_context(inode);
>>>>> + if (!ctx)
>>>>> + return NULL;
>>>>> + spin_lock(&ctx->flc_lock);
>>>>> + if (!list_empty(&ctx->flc_lease)) {
>>>>> + fl = list_first_entry(&ctx->flc_lease,
>>>>> + struct file_lock, fl_list);
>>>>> + if (fl->fl_type == F_WRLCK) {
>>>>> + spin_unlock(&ctx->flc_lock);
>>>>> + return fl;
>>>>> + }
> One more issue here too. FL_LAYOUT file_locks live on this list too.
> They shouldn't conflict with leases or delegations, so you probably just
> want to skip them.
oh ok, that means we have to scan the list and skip the FL_LAYOUT file_locks.
>
> Longer term, I wonder if we ought to add a new list in the
> file_lock_context for layouts? There's no reason to keep them all on the
> same list.
Yes, that would be nice.
Thanks Jeff,
-Dai
>
>>>>> + }
>>>>> + spin_unlock(&ctx->flc_lock);
>>>>> + return NULL;
>>>>> +}
>>>>> +
>>>>> +static __be32
>>>>> +nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode
>>>>> *inode)
>>>>> +{
>>>>> + __be32 status;
>>>>> + struct file_lock *fl;
>>>>> + struct nfs4_delegation *dp;
>>>>> +
>>>>> + fl = nfs4_wrdeleg_filelock(rqstp, inode);
>>>>> + if (!fl)
>>>>> + return 0;
>>>>> + dp = fl->fl_owner;
>>>>> + if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
>>>>> + return 0;
>>>>> + refcount_inc(&dp->dl_stid.sc_count);
>>>> Another question: Why are you taking a reference here at all?
>>> This is same as in nfsd_break_one_deleg and revoke_delegation.
>>> I think it is to prevent the delegation to be freed while delegation
>>> is being recalled.
>>>
>>>> AFAICT,
>>>> you don't even look at the delegation again after that point, so it's
>>>> not clear to me who's responsible for putting that reference.
>>> In v2, the sc_count is decrement by nfs4_put_stid. I forgot to do that
>>> in V4. I'll add it back in v5.
>> Actually the refcount is decremented after the CB_RECALL is done
>> by nfs4_put_stid in nfsd4_cb_recall_release. So we don't have to
>> decrement it here. This is to prevent the delegation to be free
>> while the recall is being sent.
>>
> That's the put for the increment in nfsd_break_one_deleg.
>
> You don't need to take an extra reference to a delegation to call
> nfsd_open_break_lease. You might not even know which delegation is being
> broken. There could even be more than one, after all.
>
> I think that extra refcount_inc is likely to cause a leak.