In order to add directory delegation support, we need to break
delegations on the parent whenever there is going to be a change in the
directory.
Add a new delegated_inode parameter to vfs_mknod. Most callers will
set that to NULL, but do_mknodat can use that to synchronously wait
for the delegation break to complete.
Signed-off-by: Jeff Layton <[email protected]>
---
drivers/base/devtmpfs.c | 2 +-
fs/ecryptfs/inode.c | 2 +-
fs/init.c | 2 +-
fs/namei.c | 11 ++++++++---
fs/nfsd/vfs.c | 2 +-
fs/overlayfs/overlayfs.h | 2 +-
include/linux/fs.h | 4 ++--
net/unix/af_unix.c | 2 +-
8 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index c00126796f79..8c0a872e3165 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -217,7 +217,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
return PTR_ERR(dentry);
err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode,
- dev->devt);
+ dev->devt, NULL);
if (!err) {
struct iattr newattrs;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a99b1e264c46..c6442b8caa2f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -566,7 +566,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
rc = lock_parent(dentry, &lower_dentry, &lower_dir);
if (!rc)
rc = vfs_mknod(&nop_mnt_idmap, lower_dir,
- lower_dentry, mode, dev);
+ lower_dentry, mode, dev, NULL);
if (rc || d_really_is_negative(lower_dentry))
goto out;
rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
diff --git a/fs/init.c b/fs/init.c
index 325c9e4d9b20..99c6b413adad 100644
--- a/fs/init.c
+++ b/fs/init.c
@@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
error = security_path_mknod(&path, dentry, mode, dev);
if (!error)
error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode,
- dentry, mode, new_decode_dev(dev));
+ dentry, mode, new_decode_dev(dev), NULL);
done_path_create(&path, dentry);
return error;
}
diff --git a/fs/namei.c b/fs/namei.c
index 01e04cf155eb..a185974c1a55 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3996,7 +3996,8 @@ EXPORT_SYMBOL(user_path_create);
* raw inode simply pass @nop_mnt_idmap.
*/
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode, dev_t dev)
+ struct dentry *dentry, umode_t mode, dev_t dev,
+ struct inode **delegated_inode)
{
bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(idmap, dir, dentry);
@@ -4020,6 +4021,10 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (error)
return error;
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ return error;
+
error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
if (!error)
fsnotify_create(dir, dentry);
@@ -4078,11 +4083,11 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode,
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
- dentry, mode, new_decode_dev(dev));
+ dentry, mode, new_decode_dev(dev), &delegated_inode);
break;
case S_IFIFO: case S_IFSOCK:
error = vfs_mknod(idmap, path.dentry->d_inode,
- dentry, mode, 0);
+ dentry, mode, 0, &delegated_inode);
break;
}
out2:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 47b8ab1d4b17..fe088e7c49c8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1525,7 +1525,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
case S_IFIFO:
case S_IFSOCK:
host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
- iap->ia_mode, rdev);
+ iap->ia_mode, rdev, NULL);
break;
default:
printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index be2518e6da95..26cdef6c3579 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -251,7 +251,7 @@ static inline int ovl_do_mknod(struct ovl_fs *ofs,
struct inode *dir, struct dentry *dentry,
umode_t mode, dev_t dev)
{
- int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev);
+ int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev, NULL);
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
return err;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8fb4101fea49..4b396c9a7a84 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1841,7 +1841,7 @@ int vfs_create(struct mnt_idmap *, struct inode *,
int vfs_mkdir(struct mnt_idmap *, struct inode *,
struct dentry *, umode_t, struct inode **);
int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
- umode_t, dev_t);
+ umode_t, dev_t, struct inode **);
int vfs_symlink(struct mnt_idmap *, struct inode *,
struct dentry *, const char *);
int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
@@ -1879,7 +1879,7 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
struct inode *dir, struct dentry *dentry)
{
return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
- WHITEOUT_DEV);
+ WHITEOUT_DEV, NULL);
}
struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0748e7ea5210..34fbcc90c984 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1227,7 +1227,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
idmap = mnt_idmap(parent.mnt);
err = security_path_mknod(&parent, dentry, mode, 0);
if (!err)
- err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
+ err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL);
if (err)
goto out_path;
err = mutex_lock_interruptible(&u->bindlock);
--
2.44.0
> int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> - umode_t, dev_t);
> + umode_t, dev_t, struct inode **);
So we will have at least the following helpers with an additional
delegated inode argument.
vfs_unlink()
vfs_link()
notify_change()
vfs_create()
vfs_mknod()
vfs_mkdir()
vfs_rmdir()
From looking at callers all these helpers will be called with non-NULL
delegated inode argument in vfs only. Unless it is generally conceivable
that other callers will want to pass a non-NULL inode argument over time
it might make more sense to add vfs_<operation>_delegated() or
__vfs_<operation>() and make vfs_mknod() and friends exported wrappers
around it.
I mean it's a matter of preference ultimately but this seems cleaner to
me. So at least for the new ones we should consider it. Would also make
the patch smaller.
> int vfs_symlink(struct mnt_idmap *, struct inode *,
> struct dentry *, const char *);
> int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
> @@ -1879,7 +1879,7 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap,
> struct inode *dir, struct dentry *dentry)
> {
> return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
> - WHITEOUT_DEV);
> + WHITEOUT_DEV, NULL);
> }
>
> struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 0748e7ea5210..34fbcc90c984 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -1227,7 +1227,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
> idmap = mnt_idmap(parent.mnt);
> err = security_path_mknod(&parent, dentry, mode, 0);
> if (!err)
> - err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
> + err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL);
> if (err)
> goto out_path;
> err = mutex_lock_interruptible(&u->bindlock);
>
> --
> 2.44.0
>
On Wed, 2024-03-20 at 14:42 +0100, Christian Brauner wrote:
> > ?int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> > - umode_t, dev_t);
> > + umode_t, dev_t, struct inode **);
>
> So we will have at least the following helpers with an additional
> delegated inode argument.
>
> vfs_unlink()
> vfs_link()
> notify_change()
> vfs_create()
> vfs_mknod()
> vfs_mkdir()
> vfs_rmdir()
>
> From looking at callers all these helpers will be called with non-NULL
> delegated inode argument in vfs only. Unless it is generally conceivable
> that other callers will want to pass a non-NULL inode argument over time
> it might make more sense to add vfs_<operation>_delegated() or
> __vfs_<operation>() and make vfs_mknod() and friends exported wrappers
> around it.
>
> I mean it's a matter of preference ultimately but this seems cleaner to
> me. So at least for the new ones we should consider it. Would also make
> the patch smaller.
>
Good suggestion. I just respun along those lines and it's a lot cleaner.
I'm still testing it but here is the new diffstat. It's a little larger
actually, but it keeps the changes more confined to namei.c:
jlayton@tleilax:~/git/linux$ git diff master --stat
fs/locks.c | 12 +++-
fs/namei.c | 227 ++++++++++++++++++++++++++++++++++++++++++++++--------------------
fs/nfs/delegation.c | 5 ++
fs/nfs/dir.c | 20 ++++++
fs/nfs/internal.h | 2 +-
fs/nfs/nfs4file.c | 2 +
fs/nfs/nfs4proc.c | 62 +++++++++++++++++-
fs/nfs/nfs4trace.h | 104 ++++++++++++++++++++++++++++++
fs/nfs/nfs4xdr.c | 136 +++++++++++++++++++++++++++++++++++++++
fs/nfs/nfstrace.h | 8 ++-
fs/nfsd/filecache.c | 37 +++++++++--
fs/nfsd/filecache.h | 2 +
fs/nfsd/nfs4proc.c | 48 ++++++++++++++
fs/nfsd/nfs4state.c | 113 ++++++++++++++++++++++++++++++++-
fs/nfsd/nfs4xdr.c | 91 ++++++++++++++++++++++++++-
fs/nfsd/state.h | 5 ++
fs/nfsd/vfs.c | 5 +-
fs/nfsd/vfs.h | 2 +-
fs/nfsd/xdr4.h | 19 ++++++
fs/smb/client/cifsfs.c | 3 +
include/linux/filelock.h | 14 +++++
include/linux/nfs4.h | 7 +++
include/linux/nfs_fs.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 2 +
25 files changed, 838 insertions(+), 90 deletions(-)
--
Jeff Layton <[email protected]>
On Wed, Mar 20, 2024 at 04:12:29PM -0400, Jeff Layton wrote:
> On Wed, 2024-03-20 at 14:42 +0100, Christian Brauner wrote:
> > > int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
> > > - umode_t, dev_t);
> > > + umode_t, dev_t, struct inode **);
> >
> > So we will have at least the following helpers with an additional
> > delegated inode argument.
> >
> > vfs_unlink()
> > vfs_link()
> > notify_change()
> > vfs_create()
> > vfs_mknod()
> > vfs_mkdir()
> > vfs_rmdir()
> >
> > From looking at callers all these helpers will be called with non-NULL
> > delegated inode argument in vfs only. Unless it is generally conceivable
> > that other callers will want to pass a non-NULL inode argument over time
> > it might make more sense to add vfs_<operation>_delegated() or
> > __vfs_<operation>() and make vfs_mknod() and friends exported wrappers
> > around it.
> >
> > I mean it's a matter of preference ultimately but this seems cleaner to
> > me. So at least for the new ones we should consider it. Would also make
> > the patch smaller.
> >
>
> Good suggestion. I just respun along those lines and it's a lot cleaner.
> I'm still testing it but here is the new diffstat. It's a little larger
> actually, but it keeps the changes more confined to namei.c:
Sounds good to me!