2021-12-17 21:57:17

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 3/9] nfsd: Retry once in nfsd_open on an -EOPENSTALE return

From: Jeff Layton <[email protected]>

If we get back -EOPENSTALE from an NFSv4 open, then we either got some
unhandled error or the inode we got back was not the same as the one
associated with the dentry.

We really have no recourse in that situation other than to retry the
open, and if it fails to just return nfserr_stale back to the client.

Signed-off-by: Jeff Layton <[email protected]>
Signed-off-by: Lance Shelton <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfsd/nfsproc.c | 1 +
fs/nfsd/vfs.c | 10 +++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 1ebf02123368..458307876ead 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -881,6 +881,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EREMOTEIO },
+ { nfserr_stale, -EOPENSTALE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 738d564ca4ce..a90cc08211a3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -779,6 +779,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
int may_flags, struct file **filp)
{
__be32 err;
+ bool retried = false;

validate_process_creds();
/*
@@ -794,9 +795,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
*/
if (type == S_IFREG)
may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+retry:
err = fh_verify(rqstp, fhp, type, may_flags);
- if (!err)
+ if (!err) {
err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ if (err == nfserr_stale && !retried) {
+ retried = true;
+ fh_put(fhp);
+ goto retry;
+ }
+ }
validate_process_creds();
return err;
}
--
2.33.1



2021-12-17 21:57:18

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 4/9] nfs: Add export support for weak cache consistency attributes

From: Trond Myklebust <[email protected]>

Allow knfsd to request weak cache consistency attributes on files that
have delegations and/or have up to date attribute caches.

Signed-off-by: Trond Myklebust <[email protected]>
Signed-off-by: Lance Shelton <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/export.c | 24 ++++++++++++
fs/nfsd/nfs3xdr.c | 83 +++++++++++++++++++++++++++-------------
fs/nfsd/nfs4xdr.c | 6 +--
fs/nfsd/vfs.c | 14 +++++++
fs/nfsd/vfs.h | 5 ++-
include/linux/exportfs.h | 3 ++
6 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 171c424cb6d5..967f8902c49b 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -151,10 +151,34 @@ static u64 nfs_fetch_iversion(struct inode *inode)
return inode_peek_iversion_raw(inode);
}

+static int nfs_exp_getattr(struct path *path, struct kstat *stat, bool force)
+{
+ const unsigned long check_valid =
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_ATIME |
+ NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_SIZE | /* NFS_INO_INVALID_BLOCKS | */
+ NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK;
+ struct inode *inode = d_inode(path->dentry);
+ int flags = force ? AT_STATX_SYNC_AS_STAT : AT_STATX_DONT_SYNC;
+ int ret, ret2 = 0;
+
+ if (!force && nfs_check_cache_invalid(inode, check_valid))
+ ret2 = -EAGAIN;
+ ret = vfs_getattr(path, stat, STATX_BASIC_STATS & ~STATX_BLOCKS, flags);
+ if (ret < 0)
+ return ret;
+ stat->blocks = nfs_calc_block_size(stat->size);
+ if (S_ISDIR(inode->i_mode))
+ stat->blksize = NFS_SERVER(inode)->dtsize;
+ stat->result_mask |= STATX_BLOCKS;
+ return ret2;
+}
+
const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
+ .getattr = nfs_exp_getattr,
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 0a5ebc52e6a9..81b6cf228517 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -415,21 +415,14 @@ svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
return svcxdr_encode_wcc_attr(xdr, fhp);
}

-/**
- * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
- * @rqstp: Context of a completed RPC transaction
- * @xdr: XDR stream
- * @fhp: File handle to encode
- *
- * Return values:
- * %false: Send buffer space was exhausted
- * %true: Success
- */
-bool
-svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
- const struct svc_fh *fhp)
+static bool
+__svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp, bool force)
{
struct dentry *dentry = fhp->fh_dentry;
+ struct path path = {
+ .dentry = dentry,
+ };
struct kstat stat;

/*
@@ -437,9 +430,10 @@ svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
* stale file handle. In this case, no attributes are
* returned.
*/
- if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry))
+ if (!dentry || !d_really_is_positive(dentry))
goto no_post_op_attrs;
- if (fh_getattr(fhp, &stat) != nfs_ok)
+ path.mnt = fhp->fh_export->ex_path.mnt;
+ if (nfsd_getattr(&path, &stat, force) != nfs_ok)
goto no_post_op_attrs;

if (xdr_stream_encode_item_present(xdr) < 0)
@@ -454,6 +448,31 @@ svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
return xdr_stream_encode_item_absent(xdr) > 0;
}

+/**
+ * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
+ * @rqstp: Context of a completed RPC transaction
+ * @xdr: XDR stream
+ * @fhp: File handle to encode
+ *
+ * Return values:
+ * %false: Send buffer space was exhausted
+ * %true: Success
+ */
+bool
+svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
+ const struct svc_fh *fhp)
+{
+ return __svcxdr_encode_post_op_attr(rqstp, xdr, fhp, true);
+}
+
+static bool
+svcxdr_encode_post_op_attr_opportunistic(struct svc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ const struct svc_fh *fhp)
+{
+ return __svcxdr_encode_post_op_attr(rqstp, xdr, fhp, !fhp->fh_no_wcc);
+}
+
/*
* Encode weak cache consistency data
*/
@@ -481,7 +500,7 @@ svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr,
neither:
if (xdr_stream_encode_item_absent(xdr) < 0)
return false;
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr, fhp))
return false;

return true;
@@ -879,11 +898,13 @@ int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
return 0;
if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
return 0;
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->dirfh))
return 0;
break;
default:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->dirfh))
return 0;
}

@@ -901,13 +922,15 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
return 0;
switch (resp->status) {
case nfs_ok:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
if (xdr_stream_encode_u32(xdr, resp->access) < 0)
return 0;
break;
default:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
}

@@ -926,7 +949,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
return 0;
switch (resp->status) {
case nfs_ok:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
if (xdr_stream_encode_u32(xdr, resp->len) < 0)
return 0;
@@ -935,7 +959,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
return 0;
break;
default:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
}

@@ -954,7 +979,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
return 0;
switch (resp->status) {
case nfs_ok:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
if (xdr_stream_encode_u32(xdr, resp->count) < 0)
return 0;
@@ -968,7 +994,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
return 0;
break;
default:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
}

@@ -1065,7 +1092,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
return 0;
switch (resp->status) {
case nfs_ok:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
return 0;
@@ -1077,7 +1105,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
return 0;
break;
default:
- if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
+ if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
+ &resp->fh))
return 0;
}

@@ -1221,7 +1250,7 @@ svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name,
if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok)
goto out_noattrs;

- if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp))
+ if (!svcxdr_encode_post_op_attr_opportunistic(resp->rqstp, xdr, fhp))
goto out;
if (!svcxdr_encode_post_op_fh3(xdr, fhp))
goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 7abeccb975b2..e14af0383b70 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2865,9 +2865,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
}

- err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
- if (err)
- goto out_nfserr;
+ status = nfsd_getattr(&path, &stat, true);
+ if (status)
+ goto out;
if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a90cc08211a3..4d57befdac6e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2412,3 +2412,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,

return err? nfserrno(err) : 0;
}
+
+
+__be32
+nfsd_getattr(struct path *p, struct kstat *stat, bool force)
+{
+ const struct export_operations *ops = p->dentry->d_sb->s_export_op;
+ int err;
+
+ if (ops->getattr)
+ err = ops->getattr(p, stat, force);
+ else
+ err = vfs_getattr(p, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
+ return err ? nfserrno(err) : 0;
+}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index b21b76e6b9a8..6edae1b9a96e 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -132,6 +132,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);

+__be32 nfsd_getattr(struct path *p, struct kstat *, bool);
+
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;
@@ -156,8 +158,7 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
{
struct path p = {.mnt = fh->fh_export->ex_path.mnt,
.dentry = fh->fh_dentry};
- return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
- AT_STATX_SYNC_AS_STAT));
+ return nfsd_getattr(&p, stat, true);
}

static inline int nfsd_create_is_exclusive(int createmode)
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3260fe714846..58f36022787e 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -10,6 +10,8 @@ struct inode;
struct iomap;
struct super_block;
struct vfsmount;
+struct path;
+struct kstat;

/* limit the handle size to NFSv4 handle size now */
#define MAX_HANDLE_SZ 128
@@ -224,6 +226,7 @@ struct export_operations {
#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
asychronous blocking locks */
unsigned long flags;
+ int (*getattr)(struct path *, struct kstat *, bool);
};

extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
--
2.33.1


2021-12-17 21:57:17

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 5/9] nfsd: NFSv3 should allow zero length writes

From: Trond Myklebust <[email protected]>

According to RFC1813: "If count is 0, the WRITE will succeed
and return a count of 0, barring errors due to permissions checking."

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfsd/vfs.c | 3 +++
net/sunrpc/svc.c | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4d57befdac6e..38fdfcbb079e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -969,6 +969,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,

trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);

+ if (!*cnt)
+ return nfs_ok;
+
if (sb->s_export_op)
exp_op_flags = sb->s_export_op->flags;

diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a3bbe5ce4570..d1ccf37a4588 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1692,7 +1692,7 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
* entirely in rq_arg.pages. In this case, @first is empty.
*/
i = 0;
- if (first->iov_len) {
+ if (first->iov_len || !total) {
vec[i].iov_base = first->iov_base;
vec[i].iov_len = min_t(size_t, total, first->iov_len);
total -= vec[i].iov_len;
--
2.33.1


2021-12-17 22:23:42

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 5/9] nfsd: NFSv3 should allow zero length writes

On Fri, Dec 17, 2021 at 5:07 PM <[email protected]> wrote:
>
> From: Trond Myklebust <[email protected]>
>
> According to RFC1813: "If count is 0, the WRITE will succeed
> and return a count of 0, barring errors due to permissions checking."

Yes, I'm surprised we're not already doing this right.

I wonder how far back this bug goes.

The svc.c code is from 8154ef2776aa "NFSD: Clean up legacy NFS WRITE
argument XDR decoders", but the behavior might predate that code. The
nfsd_vfs_write() logic, I'm not sure I understand.

We have a pynfs test for the v4 case (WRT4), but I guess we must have
nothing testing the v3 case.

--b.

>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfsd/vfs.c | 3 +++
> net/sunrpc/svc.c | 2 +-
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 4d57befdac6e..38fdfcbb079e 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -969,6 +969,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
>
> trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
>
> + if (!*cnt)
> + return nfs_ok;
> +
> if (sb->s_export_op)
> exp_op_flags = sb->s_export_op->flags;
>
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index a3bbe5ce4570..d1ccf37a4588 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1692,7 +1692,7 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
> * entirely in rq_arg.pages. In this case, @first is empty.
> */
> i = 0;
> - if (first->iov_len) {
> + if (first->iov_len || !total) {
> vec[i].iov_base = first->iov_base;
> vec[i].iov_len = min_t(size_t, total, first->iov_len);
> total -= vec[i].iov_len;
> --
> 2.33.1
>


2021-12-18 18:42:07

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 5/9] nfsd: NFSv3 should allow zero length writes



> On Dec 17, 2021, at 5:23 PM, Bruce Fields <[email protected]> wrote:
>
> On Fri, Dec 17, 2021 at 5:07 PM <[email protected]> wrote:
>>
>> From: Trond Myklebust <[email protected]>
>>
>> According to RFC1813: "If count is 0, the WRITE will succeed
>> and return a count of 0, barring errors due to permissions checking."
>
> Yes, I'm surprised we're not already doing this right.
>
> I wonder how far back this bug goes.
>
> The svc.c code is from 8154ef2776aa "NFSD: Clean up legacy NFS WRITE
> argument XDR decoders", but the behavior might predate that code. The
> nfsd_vfs_write() logic, I'm not sure I understand.

The new check in nfsd_vfs_write() might circumvent the VFS
layer's security checks, so I agree, that is a little suspect.


> We have a pynfs test for the v4 case (WRT4), but I guess we must have
> nothing testing the v3 case.

I guess cthon04 doesn't check this case.

But it seems to me WRT4 should already tickle any problems
with nfsd_vfs_write(), shouldn't it?


> --b.
>
>>
>> Signed-off-by: Trond Myklebust <[email protected]>
>> ---
>> fs/nfsd/vfs.c | 3 +++
>> net/sunrpc/svc.c | 2 +-
>> 2 files changed, 4 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
>> index 4d57befdac6e..38fdfcbb079e 100644
>> --- a/fs/nfsd/vfs.c
>> +++ b/fs/nfsd/vfs.c
>> @@ -969,6 +969,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
>>
>> trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
>>
>> + if (!*cnt)
>> + return nfs_ok;
>> +
>> if (sb->s_export_op)
>> exp_op_flags = sb->s_export_op->flags;
>>
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index a3bbe5ce4570..d1ccf37a4588 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -1692,7 +1692,7 @@ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
>> * entirely in rq_arg.pages. In this case, @first is empty.
>> */
>> i = 0;
>> - if (first->iov_len) {
>> + if (first->iov_len || !total) {
>> vec[i].iov_base = first->iov_base;
>> vec[i].iov_len = min_t(size_t, total, first->iov_len);
>> total -= vec[i].iov_len;
>> --
>> 2.33.1
>>
>

--
Chuck Lever




2021-12-18 21:16:56

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH 4/9] nfs: Add export support for weak cache consistency attributes


> On Dec 17, 2021, at 4:50 PM, [email protected] wrote:
>
> From: Trond Myklebust <[email protected]>
>
> Allow knfsd to request weak cache consistency attributes on files that
> have delegations and/or have up to date attribute caches.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> Signed-off-by: Lance Shelton <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>

This one does not apply to v5.16-rc. Please rebase this series
and resend.

More below:


> ---
> fs/nfs/export.c | 24 ++++++++++++
> fs/nfsd/nfs3xdr.c | 83 +++++++++++++++++++++++++++-------------
> fs/nfsd/nfs4xdr.c | 6 +--
> fs/nfsd/vfs.c | 14 +++++++
> fs/nfsd/vfs.h | 5 ++-
> include/linux/exportfs.h | 3 ++
> 6 files changed, 103 insertions(+), 32 deletions(-)
>
> diff --git a/fs/nfs/export.c b/fs/nfs/export.c
> index 171c424cb6d5..967f8902c49b 100644
> --- a/fs/nfs/export.c
> +++ b/fs/nfs/export.c
> @@ -151,10 +151,34 @@ static u64 nfs_fetch_iversion(struct inode *inode)
> return inode_peek_iversion_raw(inode);
> }
>
> +static int nfs_exp_getattr(struct path *path, struct kstat *stat, bool force)
> +{
> + const unsigned long check_valid =
> + NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_ATIME |
> + NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME |
> + NFS_INO_INVALID_SIZE | /* NFS_INO_INVALID_BLOCKS | */
> + NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK;
> + struct inode *inode = d_inode(path->dentry);
> + int flags = force ? AT_STATX_SYNC_AS_STAT : AT_STATX_DONT_SYNC;
> + int ret, ret2 = 0;
> +
> + if (!force && nfs_check_cache_invalid(inode, check_valid))
> + ret2 = -EAGAIN;
> + ret = vfs_getattr(path, stat, STATX_BASIC_STATS & ~STATX_BLOCKS, flags);
> + if (ret < 0)
> + return ret;
> + stat->blocks = nfs_calc_block_size(stat->size);
> + if (S_ISDIR(inode->i_mode))
> + stat->blksize = NFS_SERVER(inode)->dtsize;
> + stat->result_mask |= STATX_BLOCKS;
> + return ret2;
> +}
> +
> const struct export_operations nfs_export_ops = {
> .encode_fh = nfs_encode_fh,
> .fh_to_dentry = nfs_fh_to_dentry,
> .get_parent = nfs_get_parent,
> + .getattr = nfs_exp_getattr,
> .fetch_iversion = nfs_fetch_iversion,
> .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
> EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> index 0a5ebc52e6a9..81b6cf228517 100644
> --- a/fs/nfsd/nfs3xdr.c
> +++ b/fs/nfsd/nfs3xdr.c
> @@ -415,21 +415,14 @@ svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp)
> return svcxdr_encode_wcc_attr(xdr, fhp);
> }
>
> -/**
> - * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
> - * @rqstp: Context of a completed RPC transaction
> - * @xdr: XDR stream
> - * @fhp: File handle to encode
> - *
> - * Return values:
> - * %false: Send buffer space was exhausted
> - * %true: Success
> - */
> -bool
> -svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> - const struct svc_fh *fhp)
> +static bool
> +__svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> + const struct svc_fh *fhp, bool force)
> {
> struct dentry *dentry = fhp->fh_dentry;
> + struct path path = {
> + .dentry = dentry,
> + };
> struct kstat stat;
>
> /*
> @@ -437,9 +430,10 @@ svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> * stale file handle. In this case, no attributes are
> * returned.
> */
> - if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry))
> + if (!dentry || !d_really_is_positive(dentry))
> goto no_post_op_attrs;
> - if (fh_getattr(fhp, &stat) != nfs_ok)
> + path.mnt = fhp->fh_export->ex_path.mnt;
> + if (nfsd_getattr(&path, &stat, force) != nfs_ok)
> goto no_post_op_attrs;
>
> if (xdr_stream_encode_item_present(xdr) < 0)
> @@ -454,6 +448,31 @@ svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> return xdr_stream_encode_item_absent(xdr) > 0;
> }
>
> +/**
> + * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes
> + * @rqstp: Context of a completed RPC transaction
> + * @xdr: XDR stream
> + * @fhp: File handle to encode
> + *
> + * Return values:
> + * %false: Send buffer space was exhausted
> + * %true: Success
> + */
> +bool
> +svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> + const struct svc_fh *fhp)
> +{
> + return __svcxdr_encode_post_op_attr(rqstp, xdr, fhp, true);
> +}
> +
> +static bool
> +svcxdr_encode_post_op_attr_opportunistic(struct svc_rqst *rqstp,
> + struct xdr_stream *xdr,
> + const struct svc_fh *fhp)
> +{
> + return __svcxdr_encode_post_op_attr(rqstp, xdr, fhp, !fhp->fh_no_wcc);
> +}

I don't quite understand the need for splitting encode_post_op_attr.
At the least, more commentary in the patch description would help.
For now, I will study this and get back to you.


> +
> /*
> * Encode weak cache consistency data
> */
> @@ -481,7 +500,7 @@ svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr,
> neither:
> if (xdr_stream_encode_item_absent(xdr) < 0)
> return false;
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr, fhp))
> return false;
>
> return true;
> @@ -879,11 +898,13 @@ int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> return 0;
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->dirfh))
> return 0;
> break;
> default:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->dirfh))
> return 0;
> }
>
> @@ -901,13 +922,15 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> switch (resp->status) {
> case nfs_ok:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> if (xdr_stream_encode_u32(xdr, resp->access) < 0)
> return 0;
> break;
> default:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> }
>
> @@ -926,7 +949,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> switch (resp->status) {
> case nfs_ok:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> if (xdr_stream_encode_u32(xdr, resp->len) < 0)
> return 0;
> @@ -935,7 +959,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> break;
> default:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> }
>
> @@ -954,7 +979,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> switch (resp->status) {
> case nfs_ok:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> if (xdr_stream_encode_u32(xdr, resp->count) < 0)
> return 0;
> @@ -968,7 +994,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> break;
> default:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> }
>
> @@ -1065,7 +1092,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> switch (resp->status) {
> case nfs_ok:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
> return 0;
> @@ -1077,7 +1105,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
> return 0;
> break;
> default:
> - if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
> + if (!svcxdr_encode_post_op_attr_opportunistic(rqstp, xdr,
> + &resp->fh))
> return 0;
> }
>
> @@ -1221,7 +1250,7 @@ svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name,
> if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok)
> goto out_noattrs;
>
> - if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp))
> + if (!svcxdr_encode_post_op_attr_opportunistic(resp->rqstp, xdr, fhp))
> goto out;
> if (!svcxdr_encode_post_op_fh3(xdr, fhp))
> goto out;
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 7abeccb975b2..e14af0383b70 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2865,9 +2865,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
> goto out;
> }
>
> - err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
> - if (err)
> - goto out_nfserr;
> + status = nfsd_getattr(&path, &stat, true);
> + if (status)
> + goto out;
> if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
> FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
> (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index a90cc08211a3..4d57befdac6e 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -2412,3 +2412,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
>
> return err? nfserrno(err) : 0;
> }
> +
> +
> +__be32
> +nfsd_getattr(struct path *p, struct kstat *stat, bool force)
> +{
> + const struct export_operations *ops = p->dentry->d_sb->s_export_op;
> + int err;
> +
> + if (ops->getattr)
> + err = ops->getattr(p, stat, force);
> + else
> + err = vfs_getattr(p, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
> + return err ? nfserrno(err) : 0;
> +}

Please add nfsd_getattr() in a separate patch ("Refactor:").
And please include a kerneldoc comment for it.


> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index b21b76e6b9a8..6edae1b9a96e 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -132,6 +132,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
> __be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
> struct dentry *, int);
>
> +__be32 nfsd_getattr(struct path *p, struct kstat *, bool);
> +
> static inline int fh_want_write(struct svc_fh *fh)
> {
> int ret;
> @@ -156,8 +158,7 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
> {
> struct path p = {.mnt = fh->fh_export->ex_path.mnt,
> .dentry = fh->fh_dentry};
> - return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS,
> - AT_STATX_SYNC_AS_STAT));
> + return nfsd_getattr(&p, stat, true);
> }
>
> static inline int nfsd_create_is_exclusive(int createmode)
> diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> index 3260fe714846..58f36022787e 100644
> --- a/include/linux/exportfs.h
> +++ b/include/linux/exportfs.h
> @@ -10,6 +10,8 @@ struct inode;
> struct iomap;
> struct super_block;
> struct vfsmount;
> +struct path;
> +struct kstat;
>
> /* limit the handle size to NFSv4 handle size now */
> #define MAX_HANDLE_SZ 128
> @@ -224,6 +226,7 @@ struct export_operations {
> #define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
> asychronous blocking locks */
> unsigned long flags;
> + int (*getattr)(struct path *, struct kstat *, bool);
> };
>
> extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
> --
> 2.33.1
>

--
Chuck Lever




2021-12-19 22:25:34

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 5/9] nfsd: NFSv3 should allow zero length writes

On Sat, Dec 18, 2021 at 1:42 PM Chuck Lever III <[email protected]> wrote:
> But it seems to me WRT4 should already tickle any problems
> with nfsd_vfs_write(), shouldn't it?

I was just taking Trond's word that this is NFSv3-specific, but it's
not clear to me why from the code, and I know that WRT4 is passing.
Something's weird. I'm travelling or I'd test it.

--b.