Hi!
The following patches will make the cephfs kernel client behave the same
way as the fuse client when doing renames across different quota realms.
Luis Henriques (2):
ceph: normalize 'delta' parameter usage in check_quota_exceeded
ceph: allow rename operation under different quota realms
fs/ceph/dir.c | 9 +++----
fs/ceph/quota.c | 62 +++++++++++++++++++++++++++++++++++++++++++++----
fs/ceph/super.h | 3 ++-
3 files changed, 65 insertions(+), 9 deletions(-)
Function check_quota_exceeded() uses delta parameter only for the
QUOTA_CHECK_MAX_BYTES_OP operation. Using this parameter also for
MAX_FILES will makes the code cleaner and will be required to support
cross-quota-tree renames.
Signed-off-by: Luis Henriques <[email protected]>
---
fs/ceph/quota.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index de56dee60540..c5c8050f0f99 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -361,8 +361,6 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
spin_unlock(&ci->i_ceph_lock);
switch (op) {
case QUOTA_CHECK_MAX_FILES_OP:
- exceeded = (max && (rvalue >= max));
- break;
case QUOTA_CHECK_MAX_BYTES_OP:
exceeded = (max && (rvalue + delta > max));
break;
@@ -417,7 +415,7 @@ bool ceph_quota_is_max_files_exceeded(struct inode *inode)
WARN_ON(!S_ISDIR(inode->i_mode));
- return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
+ return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 1);
}
/*
Returning -EXDEV when trying to 'mv' files/directories from different
quota realms results in copy+unlink operations instead of the faster
CEPH_MDS_OP_RENAME. This will occur even when there aren't any quotas
set in the destination directory, or if there's enough space left for
the new file(s).
This patch adds a new helper function to be called on rename operations
which will allow these operations if they can be executed. This patch
mimics userland fuse client commit b8954e5734b3 ("client:
optimize rename operation under different quota root").
Since ceph_quota_is_same_realm() is now called only from this new
helper, make it static.
URL: https://tracker.ceph.com/issues/44791
Signed-off-by: Luis Henriques <[email protected]>
---
fs/ceph/dir.c | 9 ++++----
fs/ceph/quota.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
fs/ceph/super.h | 3 ++-
3 files changed, 64 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index d0cd0aba5843..9d3f0062d800 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1099,11 +1099,12 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
+ } else {
+ err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
+ new_dir);
+ if (err)
+ return err;
}
- /* don't allow cross-quota renames */
- if ((old_dir != new_dir) &&
- (!ceph_quota_is_same_realm(old_dir, new_dir)))
- return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index c5c8050f0f99..a6dd1a528c70 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
return NULL;
}
-bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{
struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
struct ceph_snap_realm *old_realm, *new_realm;
@@ -516,3 +516,59 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated;
}
+/*
+ * ceph_quota_check_rename - check if a rename can be executed
+ * @mdsc: MDS client instance
+ * @old: inode to be copied
+ * @new: destination inode (directory)
+ *
+ * This function verifies if a rename (e.g. moving a file or directory) can be
+ * executed. It forces an rstat update in the @new target directory (and in the
+ * source @old as well, if it's a directory). The actual check is done both for
+ * max_files and max_bytes.
+ *
+ * This function returns 0 if it's OK to do the rename, or, if quotas are
+ * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
+ */
+int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
+ struct inode *old, struct inode *new)
+{
+ struct ceph_inode_info *ci_old = ceph_inode(old);
+ int ret = 0;
+
+ if ((old == new) || (ceph_quota_is_same_realm(old, new)))
+ return 0;
+
+ /*
+ * Get the latest rstat for target directory (and for source, if a
+ * directory)
+ */
+ ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
+ if (ret)
+ return ret;
+
+ if (S_ISDIR(old->i_mode)) {
+ ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
+ if (ret)
+ return ret;
+ ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
+ ci_old->i_rbytes);
+ if (!ret)
+ ret = check_quota_exceeded(new,
+ QUOTA_CHECK_MAX_FILES_OP,
+ ci_old->i_rfiles +
+ ci_old->i_rsubdirs);
+ if (ret)
+ ret = -EXDEV;
+ } else {
+ ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
+ i_size_read(old));
+ if (!ret)
+ ret = check_quota_exceeded(new,
+ QUOTA_CHECK_MAX_FILES_OP, 1);
+ if (ret)
+ ret = -EDQUOT;
+ }
+
+ return ret;
+}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 037cdfb2ad4f..d5853831a6b5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1175,13 +1175,14 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
-extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
+extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
+ struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
On Mon, 2020-04-06 at 16:12 +0100, Luis Henriques wrote:
> Returning -EXDEV when trying to 'mv' files/directories from different
> quota realms results in copy+unlink operations instead of the faster
> CEPH_MDS_OP_RENAME. This will occur even when there aren't any quotas
> set in the destination directory, or if there's enough space left for
> the new file(s).
>
> This patch adds a new helper function to be called on rename operations
> which will allow these operations if they can be executed. This patch
> mimics userland fuse client commit b8954e5734b3 ("client:
> optimize rename operation under different quota root").
>
> Since ceph_quota_is_same_realm() is now called only from this new
> helper, make it static.
>
> URL: https://tracker.ceph.com/issues/44791
> Signed-off-by: Luis Henriques <[email protected]>
> ---
> fs/ceph/dir.c | 9 ++++----
> fs/ceph/quota.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/ceph/super.h | 3 ++-
> 3 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index d0cd0aba5843..9d3f0062d800 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1099,11 +1099,12 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
> op = CEPH_MDS_OP_RENAMESNAP;
> else
> return -EROFS;
> + } else {
> + err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
> + new_dir);
I was wondering why not use "old_dir" here, but I think this is more
correct. I guess a directory could have a different quotarealm from its
parent?
> + if (err)
> + return err;
> }
> - /* don't allow cross-quota renames */
> - if ((old_dir != new_dir) &&
> - (!ceph_quota_is_same_realm(old_dir, new_dir)))
> - return -EXDEV;
>
> dout("rename dir %p dentry %p to dir %p dentry %p\n",
> old_dir, old_dentry, new_dir, new_dentry);
> diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
> index c5c8050f0f99..a6dd1a528c70 100644
> --- a/fs/ceph/quota.c
> +++ b/fs/ceph/quota.c
> @@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
> return NULL;
> }
>
> -bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
> +static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
> {
> struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
> struct ceph_snap_realm *old_realm, *new_realm;
> @@ -516,3 +516,59 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
> return is_updated;
> }
>
> +/*
> + * ceph_quota_check_rename - check if a rename can be executed
> + * @mdsc: MDS client instance
> + * @old: inode to be copied
> + * @new: destination inode (directory)
> + *
> + * This function verifies if a rename (e.g. moving a file or directory) can be
> + * executed. It forces an rstat update in the @new target directory (and in the
> + * source @old as well, if it's a directory). The actual check is done both for
> + * max_files and max_bytes.
> + *
> + * This function returns 0 if it's OK to do the rename, or, if quotas are
> + * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
> + */
> +int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
> + struct inode *old, struct inode *new)
> +{
> + struct ceph_inode_info *ci_old = ceph_inode(old);
> + int ret = 0;
> +
> + if ((old == new) || (ceph_quota_is_same_realm(old, new)))
> + return 0;
> +
"old" represents the old dentry being moved. "new" is the new parent
dir. Do we need to test for old == new? The vfs won't allow the source
to be the ancestor of the target (or vice versa). From vfs_rename():
/* source should not be ancestor of target */
error = -EINVAL;
if (old_dentry == trap)
goto exit5;
/* target should not be an ancestor of source */
if (!(flags & RENAME_EXCHANGE))
error = -ENOTEMPTY;
> + /*
> + * Get the latest rstat for target directory (and for source, if a
> + * directory)
> + */
> + ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
> + if (ret)
> + return ret;
> +
> + if (S_ISDIR(old->i_mode)) {
> + ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
> + if (ret)
> + return ret;
> + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
> + ci_old->i_rbytes);
> + if (!ret)
> + ret = check_quota_exceeded(new,
> + QUOTA_CHECK_MAX_FILES_OP,
> + ci_old->i_rfiles +
> + ci_old->i_rsubdirs);
> + if (ret)
> + ret = -EXDEV;
> + } else {
> + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
> + i_size_read(old));
> + if (!ret)
> + ret = check_quota_exceeded(new,
> + QUOTA_CHECK_MAX_FILES_OP, 1);
> + if (ret)
> + ret = -EDQUOT;
> + }
> +
> + return ret;
> +}
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 037cdfb2ad4f..d5853831a6b5 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1175,13 +1175,14 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
> struct ceph_mds_session *session,
> struct ceph_msg *msg);
> extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
> -extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
> extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
> loff_t newlen);
> extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
> loff_t newlen);
> extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
> struct kstatfs *buf);
> +extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
> + struct inode *old, struct inode *new);
> extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
>
> #endif /* _FS_CEPH_SUPER_H */
Looks good otherwise. Nice work!
--
Jeff Layton <[email protected]>
On Mon, Apr 06, 2020 at 04:05:01PM -0400, Jeff Layton wrote:
> On Mon, 2020-04-06 at 16:12 +0100, Luis Henriques wrote:
> > Returning -EXDEV when trying to 'mv' files/directories from different
> > quota realms results in copy+unlink operations instead of the faster
> > CEPH_MDS_OP_RENAME. This will occur even when there aren't any quotas
> > set in the destination directory, or if there's enough space left for
> > the new file(s).
> >
> > This patch adds a new helper function to be called on rename operations
> > which will allow these operations if they can be executed. This patch
> > mimics userland fuse client commit b8954e5734b3 ("client:
> > optimize rename operation under different quota root").
> >
> > Since ceph_quota_is_same_realm() is now called only from this new
> > helper, make it static.
> >
> > URL: https://tracker.ceph.com/issues/44791
> > Signed-off-by: Luis Henriques <[email protected]>
> > ---
> > fs/ceph/dir.c | 9 ++++----
> > fs/ceph/quota.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/ceph/super.h | 3 ++-
> > 3 files changed, 64 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index d0cd0aba5843..9d3f0062d800 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -1099,11 +1099,12 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
> > op = CEPH_MDS_OP_RENAMESNAP;
> > else
> > return -EROFS;
> > + } else {
> > + err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
> > + new_dir);
>
> I was wondering why not use "old_dir" here, but I think this is more
> correct. I guess a directory could have a different quotarealm from its
> parent?
Yes, you can set, for example, ceph.quota.max_files to in a directory, and
set it again in a subdirectory; this subdirectory will now be on a
different quotarealm.
>
> > + if (err)
> > + return err;
> > }
> > - /* don't allow cross-quota renames */
> > - if ((old_dir != new_dir) &&
> > - (!ceph_quota_is_same_realm(old_dir, new_dir)))
> > - return -EXDEV;
> >
> > dout("rename dir %p dentry %p to dir %p dentry %p\n",
> > old_dir, old_dentry, new_dir, new_dentry);
> > diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
> > index c5c8050f0f99..a6dd1a528c70 100644
> > --- a/fs/ceph/quota.c
> > +++ b/fs/ceph/quota.c
> > @@ -264,7 +264,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
> > return NULL;
> > }
> >
> > -bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
> > +static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
> > {
> > struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
> > struct ceph_snap_realm *old_realm, *new_realm;
> > @@ -516,3 +516,59 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
> > return is_updated;
> > }
> >
> > +/*
> > + * ceph_quota_check_rename - check if a rename can be executed
> > + * @mdsc: MDS client instance
> > + * @old: inode to be copied
> > + * @new: destination inode (directory)
> > + *
> > + * This function verifies if a rename (e.g. moving a file or directory) can be
> > + * executed. It forces an rstat update in the @new target directory (and in the
> > + * source @old as well, if it's a directory). The actual check is done both for
> > + * max_files and max_bytes.
> > + *
> > + * This function returns 0 if it's OK to do the rename, or, if quotas are
> > + * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
> > + */
> > +int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
> > + struct inode *old, struct inode *new)
> > +{
> > + struct ceph_inode_info *ci_old = ceph_inode(old);
> > + int ret = 0;
> > +
> > + if ((old == new) || (ceph_quota_is_same_realm(old, new)))
> > + return 0;
> > +
>
> "old" represents the old dentry being moved. "new" is the new parent
> dir. Do we need to test for old == new? The vfs won't allow the source
> to be the ancestor of the target (or vice versa). From vfs_rename():
>
> /* source should not be ancestor of target */
> error = -EINVAL;
> if (old_dentry == trap)
> goto exit5;
> /* target should not be an ancestor of source */
> if (!(flags & RENAME_EXCHANGE))
> error = -ENOTEMPTY;
>
Err... yeah, that 'old == new' is a mistake. I just wanted to keep the
optimization in the original code:
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
I'll send out v2 in a minute with the following change to ceph_rename:
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
Cheers,
--
Lu?s
> > + /*
> > + * Get the latest rstat for target directory (and for source, if a
> > + * directory)
> > + */
> > + ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
> > + if (ret)
> > + return ret;
> > +
> > + if (S_ISDIR(old->i_mode)) {
> > + ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
> > + if (ret)
> > + return ret;
> > + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
> > + ci_old->i_rbytes);
> > + if (!ret)
> > + ret = check_quota_exceeded(new,
> > + QUOTA_CHECK_MAX_FILES_OP,
> > + ci_old->i_rfiles +
> > + ci_old->i_rsubdirs);
> > + if (ret)
> > + ret = -EXDEV;
> > + } else {
> > + ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
> > + i_size_read(old));
> > + if (!ret)
> > + ret = check_quota_exceeded(new,
> > + QUOTA_CHECK_MAX_FILES_OP, 1);
> > + if (ret)
> > + ret = -EDQUOT;
> > + }
> > +
> > + return ret;
> > +}
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index 037cdfb2ad4f..d5853831a6b5 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -1175,13 +1175,14 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
> > struct ceph_mds_session *session,
> > struct ceph_msg *msg);
> > extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
> > -extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
> > extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
> > loff_t newlen);
> > extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
> > loff_t newlen);
> > extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
> > struct kstatfs *buf);
> > +extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
> > + struct inode *old, struct inode *new);
> > extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
> >
> > #endif /* _FS_CEPH_SUPER_H */
>
> Looks good otherwise. Nice work!
> --
> Jeff Layton <[email protected]>
>