A new fscache-based shared domain mode is going to be introduced for
erofs. In which case, same data blobs in same domain will be shared
and reused to reduce on-disk space usage.
As the first step, we use pseudo mnt to manage and maintain domain's
lifecycle.
The implementation of sharing blobs will be introduced in subsequent
patches.
Signed-off-by: Jia Zhu <[email protected]>
---
fs/erofs/Makefile | 2 +-
fs/erofs/domain.c | 115 ++++++++++++++++++++++++++++++++++++++++++++
fs/erofs/fscache.c | 10 +++-
fs/erofs/internal.h | 20 +++++++-
fs/erofs/super.c | 17 ++++---
5 files changed, 154 insertions(+), 10 deletions(-)
create mode 100644 fs/erofs/domain.c
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index 99bbc597a3e9..a4af7ecf636f 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -5,4 +5,4 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
-erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
+erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o domain.o
diff --git a/fs/erofs/domain.c b/fs/erofs/domain.c
new file mode 100644
index 000000000000..6461e4ee3582
--- /dev/null
+++ b/fs/erofs/domain.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Bytedance Inc. All rights reserved.
+ */
+
+#include <linux/pseudo_fs.h>
+#include <linux/fs_context.h>
+#include <linux/magic.h>
+#include <linux/fscache.h>
+
+#include "internal.h"
+
+static DEFINE_SPINLOCK(erofs_domain_list_lock);
+static LIST_HEAD(erofs_domain_list);
+
+void erofs_fscache_domain_get(struct erofs_domain *domain)
+{
+ if (!domain)
+ return;
+ refcount_inc(&domain->ref);
+}
+
+void erofs_fscache_domain_put(struct erofs_domain *domain)
+{
+ if (!domain)
+ return;
+ if (refcount_dec_and_test(&domain->ref)) {
+ fscache_relinquish_volume(domain->volume, NULL, false);
+ spin_lock(&erofs_domain_list_lock);
+ list_del(&domain->list);
+ spin_unlock(&erofs_domain_list_lock);
+ kern_unmount(domain->mnt);
+ kfree(domain->domain_id);
+ kfree(domain);
+ }
+}
+
+static int anon_inodefs_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
+
+ if (!ctx)
+ return -ENOMEM;
+ return 0;
+}
+
+static struct file_system_type anon_inode_fs_type = {
+ .name = "pseudo_domainfs",
+ .init_fs_context = anon_inodefs_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static int erofs_fscache_init_domain(struct super_block *sb)
+{
+ int err;
+ struct erofs_domain *domain;
+ struct vfsmount *pseudo_mnt;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
+ if (!domain)
+ return -ENOMEM;
+
+ domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL);
+ if (!domain->domain_id) {
+ kfree(domain);
+ return -ENOMEM;
+ }
+ sbi->domain = domain;
+ pseudo_mnt = kern_mount(&anon_inode_fs_type);
+ if (IS_ERR(pseudo_mnt)) {
+ err = PTR_ERR(pseudo_mnt);
+ goto out;
+ }
+ err = erofs_fscache_register_fs(sb);
+ if (err) {
+ kern_unmount(pseudo_mnt);
+ goto out;
+ }
+
+ domain->mnt = pseudo_mnt;
+ domain->volume = sbi->volume;
+ refcount_set(&domain->ref, 1);
+ mutex_init(&domain->mutex);
+ pseudo_mnt->mnt_sb->s_fs_info = domain;
+ list_add(&domain->list, &erofs_domain_list);
+ return 0;
+out:
+ kfree(domain->domain_id);
+ kfree(domain);
+ sbi->domain = NULL;
+ return err;
+}
+
+int erofs_fscache_register_domain(struct super_block *sb)
+{
+ int err;
+ struct erofs_domain *domain;
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+ spin_lock(&erofs_domain_list_lock);
+ list_for_each_entry(domain, &erofs_domain_list, list) {
+ if (!strcmp(domain->domain_id, sbi->opt.domain_id)) {
+ erofs_fscache_domain_get(domain);
+ sbi->domain = domain;
+ sbi->volume = domain->volume;
+ spin_unlock(&erofs_domain_list_lock);
+ return 0;
+ }
+ }
+ err = erofs_fscache_init_domain(sb);
+ spin_unlock(&erofs_domain_list_lock);
+
+ return err;
+}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 8e01d89c3319..5c918a06ae9a 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -495,7 +495,8 @@ int erofs_fscache_register_fs(struct super_block *sb)
char *name;
int ret = 0;
- name = kasprintf(GFP_KERNEL, "erofs,%s", sbi->opt.fsid);
+ name = kasprintf(GFP_KERNEL, "erofs,%s",
+ sbi->domain ? sbi->domain->domain_id : sbi->opt.fsid);
if (!name)
return -ENOMEM;
@@ -515,6 +516,11 @@ void erofs_fscache_unregister_fs(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
- fscache_relinquish_volume(sbi->volume, NULL, false);
+ if (sbi->domain)
+ erofs_fscache_domain_put(sbi->domain);
+ else
+ fscache_relinquish_volume(sbi->volume, NULL, false);
+
sbi->volume = NULL;
+ sbi->domain = NULL;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index fe435d077f1a..bca4e9c57890 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -99,6 +99,15 @@ struct erofs_sb_lz4_info {
u16 max_pclusterblks;
};
+struct erofs_domain {
+ refcount_t ref;
+ struct mutex mutex;
+ struct vfsmount *mnt;
+ struct list_head list;
+ struct fscache_volume *volume;
+ char *domain_id;
+};
+
struct erofs_fscache {
struct fscache_cookie *cookie;
struct inode *inode;
@@ -158,6 +167,7 @@ struct erofs_sb_info {
/* fscache support */
struct fscache_volume *volume;
struct erofs_fscache *s_fscache;
+ struct erofs_domain *domain;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
@@ -608,8 +618,11 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
/* fscache.c */
#ifdef CONFIG_EROFS_FS_ONDEMAND
+void erofs_fscache_domain_get(struct erofs_domain *domain);
+void erofs_fscache_domain_put(struct erofs_domain *domain);
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);
+int erofs_fscache_register_domain(struct super_block *sb);
int erofs_fscache_register_cookie(struct super_block *sb,
struct erofs_fscache **fscache,
@@ -620,10 +633,15 @@ extern const struct address_space_operations erofs_fscache_access_aops;
#else
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
- return 0;
+ return -EOPNOTSUPP;
}
static inline void erofs_fscache_unregister_fs(struct super_block *sb) {}
+static inline int erofs_fscache_register_domain(const struct super_block *sb)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int erofs_fscache_register_cookie(struct super_block *sb,
struct erofs_fscache **fscache,
char *name, bool need_inode)
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index fb5a84a07bd5..55d2343c18a4 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -715,12 +715,17 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_blocksize = EROFS_BLKSIZ;
sb->s_blocksize_bits = LOG_BLOCK_SIZE;
- err = erofs_fscache_register_fs(sb);
- if (err)
- return err;
-
- err = erofs_fscache_register_cookie(sb, &sbi->s_fscache,
- sbi->opt.fsid, true);
+ if (sbi->opt.domain_id) {
+ err = erofs_fscache_register_domain(sb);
+ if (err)
+ return err;
+ } else {
+ err = erofs_fscache_register_fs(sb);
+ if (err)
+ return err;
+ err = erofs_fscache_register_cookie(sb, &sbi->s_fscache,
+ sbi->opt.fsid, true);
+ }
if (err)
return err;
--
2.20.1
On Wed, Aug 31, 2022 at 08:31:22PM +0800, Jia Zhu wrote:
> A new fscache-based shared domain mode is going to be introduced for
> erofs. In which case, same data blobs in same domain will be shared
> and reused to reduce on-disk space usage.
>
> As the first step, we use pseudo mnt to manage and maintain domain's
> lifecycle.
>
> The implementation of sharing blobs will be introduced in subsequent
> patches.
>
> Signed-off-by: Jia Zhu <[email protected]>
> ---
> fs/erofs/Makefile | 2 +-
> fs/erofs/domain.c | 115 ++++++++++++++++++++++++++++++++++++++++++++
> fs/erofs/fscache.c | 10 +++-
> fs/erofs/internal.h | 20 +++++++-
> fs/erofs/super.c | 17 ++++---
> 5 files changed, 154 insertions(+), 10 deletions(-)
> create mode 100644 fs/erofs/domain.c
>
> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
> index 99bbc597a3e9..a4af7ecf636f 100644
> --- a/fs/erofs/Makefile
> +++ b/fs/erofs/Makefile
> @@ -5,4 +5,4 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o
> erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
> erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
> erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
> -erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
> +erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o domain.o
> diff --git a/fs/erofs/domain.c b/fs/erofs/domain.c
> new file mode 100644
> index 000000000000..6461e4ee3582
> --- /dev/null
> +++ b/fs/erofs/domain.c
`domain` is now still entirely designed for fscache backend.
I'd suggest moving the code below to fscache.c for now until we
could find more use cases more than fscache.
> @@ -0,0 +1,115 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2022, Bytedance Inc. All rights reserved.
Also you could move this line to fscache.c as well.
> + */
> +
> +#include <linux/pseudo_fs.h>
> +#include <linux/fs_context.h>
> +#include <linux/magic.h>
> +#include <linux/fscache.h>
> +
> +#include "internal.h"
> +
> +static DEFINE_SPINLOCK(erofs_domain_list_lock);
> +static LIST_HEAD(erofs_domain_list);
> +
> +void erofs_fscache_domain_get(struct erofs_domain *domain)
> +{
> + if (!domain)
> + return;
> + refcount_inc(&domain->ref);
> +}
> +
> +void erofs_fscache_domain_put(struct erofs_domain *domain)
> +{
> + if (!domain)
> + return;
> + if (refcount_dec_and_test(&domain->ref)) {
> + fscache_relinquish_volume(domain->volume, NULL, false);
> + spin_lock(&erofs_domain_list_lock);
> + list_del(&domain->list);
> + spin_unlock(&erofs_domain_list_lock);
> + kern_unmount(domain->mnt);
> + kfree(domain->domain_id);
> + kfree(domain);
> + }
> +}
> +
> +static int anon_inodefs_init_fs_context(struct fs_context *fc)
> +{
> + struct pseudo_fs_context *ctx = init_pseudo(fc, ANON_INODE_FS_MAGIC);
> +
> + if (!ctx)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +static struct file_system_type anon_inode_fs_type = {
> + .name = "pseudo_domainfs",
> + .init_fs_context = anon_inodefs_init_fs_context,
> + .kill_sb = kill_anon_super,
> +};
Could we just use erofs filesystem type but with a special sb instead?
No need to cause messes like this.
Thanks,
Gao Xiang