2022-04-22 08:35:42

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC 0/8] Make NFSv4 OPEN(CREATE) less brittle

Attempt to address occasional reports of test failures caused by
NFSv4 OPEN(CREATE) having failing internally after the target
file object has been created.

The basic approach is to re-organize the NFSv4 OPEN code path so
that common failure modes occur /before/ the call to vfs_create()
rather than afterwards. In addition, the file is opened and created
atomically so that another client can't race and de-permit the
file just after it was created but before the server has opened it.

So far I haven't found any regressions. However I have not been
able to reproduce the original failures.


---

Chuck Lever (8):
NFSD: Clean up nfsd3_proc_create()
NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
NFSD: Refactor nfsd_create_setattr()
NFSD: Refactor NFSv3 CREATE
NFSD: Refactor NFSv4 OPEN(CREATE)
NFSD: Remove do_nfsd_create()
NFSD: Clean up nfsd_open_verified()
NFSD: Instantiate a struct file when creating a regular NFSv4 file


fs/nfsd/filecache.c | 51 +++++++--
fs/nfsd/filecache.h | 2 +
fs/nfsd/nfs3proc.c | 141 +++++++++++++++++++++----
fs/nfsd/nfs4proc.c | 197 +++++++++++++++++++++++++++++++++--
fs/nfsd/nfs4state.c | 16 ++-
fs/nfsd/vfs.c | 245 ++++++++++----------------------------------
fs/nfsd/vfs.h | 14 +--
fs/nfsd/xdr4.h | 1 +
fs/open.c | 44 ++++++++
include/linux/fs.h | 2 +
10 files changed, 471 insertions(+), 242 deletions(-)

--
Chuck Lever


2022-04-22 14:59:13

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC 6/8] NFSD: Remove do_nfsd_create()

Now that its two callers have their own version-specific instance of
this function, do_nfsd_create() is no longer used.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/vfs.c | 150 ---------------------------------------------------------
fs/nfsd/vfs.h | 10 ----
2 files changed, 160 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ed58f7d46730..407976830a2b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1393,156 +1393,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
rdev, resfhp);
}

-/*
- * NFSv3 and NFSv4 version of nfsd_create
- */
-__be32
-do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
- char *fname, int flen, struct iattr *iap,
- struct svc_fh *resfhp, int createmode, u32 *verifier,
- bool *truncp, bool *created)
-{
- struct dentry *dentry, *dchild = NULL;
- struct inode *dirp;
- __be32 err;
- int host_err;
- __u32 v_mtime=0, v_atime=0;
-
- err = nfserr_perm;
- if (!flen)
- goto out;
- err = nfserr_exist;
- if (isdotent(fname, flen))
- goto out;
- if (!(iap->ia_valid & ATTR_MODE))
- iap->ia_mode = 0;
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- dirp = d_inode(dentry);
-
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
- fh_lock_nested(fhp, I_MUTEX_PARENT);
-
- /*
- * Compose the response file handle.
- */
- dchild = lookup_one_len(fname, dentry, flen);
- host_err = PTR_ERR(dchild);
- if (IS_ERR(dchild))
- goto out_nfserr;
-
- /* If file doesn't exist, check for permissions to create one */
- if (d_really_is_negative(dchild)) {
- err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
- if (err)
- goto out;
- }
-
- err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
- if (err)
- goto out;
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* solaris7 gets confused (bugid 4218508) if these have
- * the high bit set, as do xfs filesystems without the
- * "bigtime" feature. So just clear the high bits. If this is
- * ever changed to use different attrs for storing the
- * verifier, then do_open_lookup() will also need to be fixed
- * accordingly.
- */
- v_mtime = verifier[0]&0x7fffffff;
- v_atime = verifier[1]&0x7fffffff;
- }
-
- if (d_really_is_positive(dchild)) {
- err = 0;
-
- switch (createmode) {
- case NFS3_CREATE_UNCHECKED:
- if (! d_is_reg(dchild))
- goto out;
- else if (truncp) {
- /* in nfsv4, we need to treat this case a little
- * differently. we don't want to truncate the
- * file now; this would be wrong if the OPEN
- * fails for some other reason. furthermore,
- * if the size is nonzero, we should ignore it
- * according to spec!
- */
- *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
- }
- else {
- iap->ia_valid &= ATTR_SIZE;
- goto set_attr;
- }
- break;
- case NFS3_CREATE_EXCLUSIVE:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = true;
- break;
- }
- fallthrough;
- case NFS4_CREATE_EXCLUSIVE4_1:
- if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime
- && d_inode(dchild)->i_atime.tv_sec == v_atime
- && d_inode(dchild)->i_size == 0 ) {
- if (created)
- *created = true;
- goto set_attr;
- }
- fallthrough;
- case NFS3_CREATE_GUARDED:
- err = nfserr_exist;
- }
- goto out;
- }
-
- if (!IS_POSIXACL(dirp))
- iap->ia_mode &= ~current_umask();
-
- host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
- if (host_err < 0)
- goto out_nfserr;
- if (created)
- *created = true;
-
- nfsd_check_ignore_resizing(iap);
-
- if (nfsd_create_is_exclusive(createmode)) {
- /* Cram the verifier into atime/mtime */
- iap->ia_valid = ATTR_MTIME|ATTR_ATIME
- | ATTR_MTIME_SET|ATTR_ATIME_SET;
- /* XXX someone who knows this better please fix it for nsec */
- iap->ia_mtime.tv_sec = v_mtime;
- iap->ia_atime.tv_sec = v_atime;
- iap->ia_mtime.tv_nsec = 0;
- iap->ia_atime.tv_nsec = 0;
- }
-
- set_attr:
- err = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
-
- out:
- fh_unlock(fhp);
- if (dchild && !IS_ERR(dchild))
- dput(dchild);
- fh_drop_write(fhp);
- return err;
-
- out_nfserr:
- err = nfserrno(host_err);
- goto out;
-}
-
/*
* Read a symlink. On entry, *lenp must contain the maximum path length that
* fits into the buffer. On return, it contains the true length.
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1f32a83456b0..f99794b033a5 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -71,10 +71,6 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_fh *resfhp, struct iattr *iap);
-__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
- char *name, int len, struct iattr *attrs,
- struct svc_fh *res, int createmode,
- u32 *verifier, bool *truncp, bool *created);
__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp,
u64 offset, u32 count, __be32 *verf);
#ifdef CONFIG_NFSD_V4
@@ -161,10 +157,4 @@ static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat)
AT_STATX_SYNC_AS_STAT));
}

-static inline int nfsd_create_is_exclusive(int createmode)
-{
- return createmode == NFS3_CREATE_EXCLUSIVE
- || createmode == NFS4_CREATE_EXCLUSIVE4_1;
-}
-
#endif /* LINUX_NFSD_VFS_H */


2022-04-22 17:43:47

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC 7/8] NFSD: Clean up nfsd_open_verified()

Its only caller always passes S_IFREG as the @type parameter. As an
additional clean-up, add a kerneldoc comment.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/filecache.c | 4 ++--
fs/nfsd/vfs.c | 15 ++++++++++++---
fs/nfsd/vfs.h | 2 +-
3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 2c1b027774d4..781bef7e42d9 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -995,8 +995,8 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,

nf->nf_mark = nfsd_file_mark_find_or_create(nf);
if (nf->nf_mark)
- status = nfsd_open_verified(rqstp, fhp, S_IFREG,
- may_flags, &nf->nf_file);
+ status = nfsd_open_verified(rqstp, fhp, may_flags,
+ &nf->nf_file);
else
status = nfserr_jukebox;
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 407976830a2b..893b5f21dab9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -827,14 +827,23 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
return err;
}

+/**
+ * nfsd_open_verified - Open a regular file for the filecache
+ * @rqstp: RPC request
+ * @fhp: NFS filehandle of the file to open
+ * @may_flags: internal permission flags
+ * @filp: OUT: open "struct file *"
+ *
+ * Returns an nfsstat value in network byte order.
+ */
__be32
-nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
- int may_flags, struct file **filp)
+nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
+ struct file **filp)
{
__be32 err;

validate_process_creds();
- err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
+ err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
validate_process_creds();
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index f99794b033a5..26347d76f44a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -86,7 +86,7 @@ __be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int nfsd_open_break_lease(struct inode *, int);
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
-__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t,
+__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *,
int, struct file **);
__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,


2022-04-22 18:02:41

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC 2/8] NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()

Clean up: The "out" label already invokes fh_drop_write().

Note that fh_drop_write() is already careful not to invoke
mnt_drop_write() if either it has already been done or there is
nothing to drop. Therefore no change in behavior is expected.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/vfs.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c22ad0532e8e..97fee9b33490 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1485,7 +1485,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
}
- fh_drop_write(fhp);
goto out;
}

@@ -1493,10 +1492,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode &= ~current_umask();

host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
- if (host_err < 0) {
- fh_drop_write(fhp);
+ if (host_err < 0)
goto out_nfserr;
- }
if (created)
*created = true;



2022-04-22 22:53:29

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC 5/8] NFSD: Refactor NFSv4 OPEN(CREATE)

Copy do_nfsd_create() to nfs4proc.c and remove NFSv3-specific logic.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4proc.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 152 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b207c76a873f..99c0485a29e9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -37,6 +37,8 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/namei.h>
+
#include <linux/sunrpc/addr.h>
#include <linux/nfs_ssc.h>

@@ -235,6 +237,154 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate
&resfh->fh_handle);
}

+static inline bool nfsd4_create_is_exclusive(int createmode)
+{
+ return createmode == NFS4_CREATE_EXCLUSIVE ||
+ createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
+/*
+ * Implement NFSv4's unchecked, guarded, and exclusive create
+ * semantics for regular files. Open state for this new file is
+ * subsequently fabricated in nfsd4_process_open2().
+ *
+ * Upon return, caller must release @fhp and @resfhp.
+ */
+static __be32
+nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct svc_fh *resfhp, struct nfsd4_open *open)
+{
+ struct iattr *iap = &open->op_iattr;
+ struct dentry *parent, *child;
+ __u32 v_mtime, v_atime;
+ struct inode *inode;
+ __be32 status;
+ int host_err;
+
+ if (isdotent(open->op_fname, open->op_fnamelen))
+ return nfserr_exist;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+ if (status != nfs_ok)
+ return status;
+ parent = fhp->fh_dentry;
+ inode = d_inode(parent);
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ return nfserrno(host_err);
+
+ fh_lock_nested(fhp, I_MUTEX_PARENT);
+
+ child = lookup_one_len(open->op_fname, parent, open->op_fnamelen);
+ if (IS_ERR(child)) {
+ status = nfserrno(PTR_ERR(child));
+ goto out;
+ }
+
+ if (d_really_is_negative(child)) {
+ status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+ if (status != nfs_ok)
+ goto out;
+ }
+
+ status = fh_compose(resfhp, fhp->fh_export, child, fhp);
+ if (status != nfs_ok)
+ goto out;
+
+ v_mtime = 0;
+ v_atime = 0;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ u32 *verifier = (u32 *)open->op_verf.data;
+
+ /*
+ * Solaris 7 gets confused (bugid 4218508) if these have
+ * the high bit set, as do xfs filesystems without the
+ * "bigtime" feature. So just clear the high bits. If this
+ * is ever changed to use different attrs for storing the
+ * verifier, then do_open_lookup() will also need to be
+ * fixed accordingly.
+ */
+ v_mtime = verifier[0] & 0x7fffffff;
+ v_atime = verifier[1] & 0x7fffffff;
+ }
+
+ if (d_really_is_positive(child)) {
+ status = nfs_ok;
+
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ if (!d_is_reg(child))
+ break;
+
+ /*
+ * In NFSv4, we don't want to truncate the file
+ * now. This would be wrong if the OPEN fails for
+ * some other reason. Furthermore, if the size is
+ * nonzero, we should ignore it according to spec!
+ */
+ open->op_truncate = (iap->ia_valid & ATTR_SIZE) &&
+ !iap->ia_size;
+ break;
+ case NFS4_CREATE_GUARDED:
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+ d_inode(child)->i_atime.tv_sec == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ break; /* subtle */
+ }
+ status = nfserr_exist;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (d_inode(child)->i_mtime.tv_sec == v_mtime &&
+ d_inode(child)->i_atime.tv_sec == v_atime &&
+ d_inode(child)->i_size == 0) {
+ open->op_created = true;
+ goto set_attr; /* subtle */
+ }
+ status = nfserr_exist;
+ }
+ goto out;
+ }
+
+ if (!IS_POSIXACL(inode))
+ iap->ia_mode &= ~current_umask();
+
+ host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true);
+ if (host_err < 0) {
+ status = nfserrno(host_err);
+ goto out;
+ }
+ open->op_created = true;
+
+ /* A newly created file already has a file size of zero. */
+ if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
+ iap->ia_valid &= ~ATTR_SIZE;
+ if (nfsd4_create_is_exclusive(open->op_createmode)) {
+ iap->ia_valid = ATTR_MTIME | ATTR_ATIME |
+ ATTR_MTIME_SET|ATTR_ATIME_SET;
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ }
+
+set_attr:
+ status = nfsd_create_setattr(rqstp, fhp, resfhp, iap);
+
+out:
+ fh_unlock(fhp);
+ if (child && !IS_ERR(child))
+ dput(child);
+ fh_drop_write(fhp);
+ return status;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -264,16 +414,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* yes | yes | GUARDED4 | GUARDED4
*/

- /*
- * Note: create modes (UNCHECKED,GUARDED...) are the same
- * in NFSv4 as in v3 except EXCLUSIVE4_1.
- */
current->fs->umask = open->op_umask;
- status = do_nfsd_create(rqstp, current_fh, open->op_fname,
- open->op_fnamelen, &open->op_iattr,
- *resfh, open->op_createmode,
- (u32 *)open->op_verf.data,
- &open->op_truncate, &open->op_created);
+ status = nfsd4_create_file(rqstp, current_fh, *resfh, open);
current->fs->umask = 0;

if (!status && open->op_label.len)
@@ -284,7 +426,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* use the returned bitmask to indicate which attributes
* we used to store the verifier:
*/
- if (nfsd_create_is_exclusive(open->op_createmode) && status == 0)
+ if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0)
open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS |
FATTR4_WORD1_TIME_MODIFY);
} else