2024-03-07 02:44:59

by Baokun Li

[permalink] [raw]
Subject: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Lockdep reported the following issue when mounting erofs with a domain_id:

============================================
WARNING: possible recursive locking detected
6.8.0-rc7-xfstests #521 Not tainted
--------------------------------------------
mount/396 is trying to acquire lock:
ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
at: alloc_super+0xe3/0x3d0

but task is already holding lock:
ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
at: alloc_super+0xe3/0x3d0

other info that might help us debug this:
Possible unsafe locking scenario:

CPU0
----
lock(&type->s_umount_key#50/1);
lock(&type->s_umount_key#50/1);

*** DEADLOCK ***

May be due to missing lock nesting notation

2 locks held by mount/396:
#0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
at: alloc_super+0xe3/0x3d0
#1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
at: erofs_fscache_register_fs+0x3d/0x270 [erofs]

stack backtrace:
CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
Call Trace:
<TASK>
dump_stack_lvl+0x64/0xb0
validate_chain+0x5c4/0xa00
__lock_acquire+0x6a9/0xd50
lock_acquire+0xcd/0x2b0
down_write_nested+0x45/0xd0
alloc_super+0xe3/0x3d0
sget_fc+0x62/0x2f0
vfs_get_super+0x21/0x90
vfs_get_tree+0x2c/0xf0
fc_mount+0x12/0x40
vfs_kern_mount.part.0+0x75/0x90
kern_mount+0x24/0x40
erofs_fscache_register_fs+0x1ef/0x270 [erofs]
erofs_fc_fill_super+0x213/0x380 [erofs]

This is because the file_system_type of both erofs and the pseudo-mount
point of domain_id is erofs_fs_type, so two successive calls to
alloc_super() are considered to be using the same lock and trigger the
warning above.

Therefore add a nodev file_system_type named erofs_anon_fs_type to
silence this complaint. In addition, to reduce code coupling, refactor
out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
and move the erofs_pseudo_mnt related code to fscache.c.

Signed-off-by: Baokun Li <[email protected]>
---
fs/erofs/fscache.c | 48 ++++++++++++++++++++++++++++++++++++++++++++-
fs/erofs/internal.h | 10 +++++++++-
fs/erofs/super.c | 37 ++++++++--------------------------
3 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 89a7c2453aae..a346884a2c00 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -4,6 +4,7 @@
* Copyright (C) 2022, Bytedance Inc. All rights reserved.
*/
#include <linux/fscache.h>
+#include <linux/fs_context.h>
#include "internal.h"

static DEFINE_MUTEX(erofs_domain_list_lock);
@@ -12,6 +13,51 @@ static LIST_HEAD(erofs_domain_list);
static LIST_HEAD(erofs_domain_cookies_list);
static struct vfsmount *erofs_pseudo_mnt;

+static int
+erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
+{
+ static const struct tree_descr empty_descr = {""};
+
+ return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
+}
+
+static int erofs_fc_anon_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
+}
+
+static const struct fs_context_operations erofs_anon_context_ops = {
+ .get_tree = erofs_fc_anon_get_tree,
+};
+
+static int erofs_anon_init_fs_context(struct fs_context *fc)
+{
+ fc->ops = &erofs_anon_context_ops;
+ return 0;
+}
+
+static void erofs_kill_pseudo_sb(struct super_block *sb)
+{
+ kill_anon_super(sb);
+}
+
+static struct file_system_type erofs_anon_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "pseudo_erofs",
+ .init_fs_context = erofs_anon_init_fs_context,
+ .kill_sb = erofs_kill_pseudo_sb,
+};
+
+int erofs_anon_register_fs(void)
+{
+ return register_filesystem(&erofs_anon_fs_type);
+}
+
+void erofs_anon_unregister_fs(void)
+{
+ unregister_filesystem(&erofs_anon_fs_type);
+}
+
struct erofs_fscache_request {
struct erofs_fscache_request *primary;
struct netfs_cache_resources cache_resources;
@@ -381,7 +427,7 @@ static int erofs_fscache_init_domain(struct super_block *sb)
goto out;

if (!erofs_pseudo_mnt) {
- struct vfsmount *mnt = kern_mount(&erofs_fs_type);
+ struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto out;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 0f0706325b7b..d9e30ccceb39 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -385,7 +385,6 @@ struct erofs_map_dev {
unsigned int m_deviceid;
};

-extern struct file_system_type erofs_fs_type;
extern const struct super_operations erofs_sops;

extern const struct address_space_operations erofs_raw_access_aops;
@@ -507,6 +506,9 @@ static inline int z_erofs_deflate_exit(void) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP_DEFLATE */

#ifdef CONFIG_EROFS_FS_ONDEMAND
+int erofs_anon_register_fs(void);
+void erofs_anon_unregister_fs(void);
+
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);

@@ -514,6 +516,12 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
char *name, unsigned int flags);
void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache);
#else
+static inline int erofs_anon_register_fs(void)
+{
+ return 0;
+}
+static inline void erofs_anon_unregister_fs(void) {}
+
static inline int erofs_fscache_register_fs(struct super_block *sb)
{
return -EOPNOTSUPP;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 9b4b66dcdd4f..a745010e9fe6 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -579,13 +579,6 @@ static const struct export_operations erofs_export_ops = {
.get_parent = erofs_get_parent,
};

-static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc)
-{
- static const struct tree_descr empty_descr = {""};
-
- return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr);
-}
-
static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct inode *inode;
@@ -712,11 +705,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return 0;
}

-static int erofs_fc_anon_get_tree(struct fs_context *fc)
-{
- return get_tree_nodev(fc, erofs_fc_fill_pseudo_super);
-}
-
static int erofs_fc_get_tree(struct fs_context *fc)
{
struct erofs_fs_context *ctx = fc->fs_private;
@@ -789,20 +777,10 @@ static const struct fs_context_operations erofs_context_ops = {
.free = erofs_fc_free,
};

-static const struct fs_context_operations erofs_anon_context_ops = {
- .get_tree = erofs_fc_anon_get_tree,
-};
-
static int erofs_init_fs_context(struct fs_context *fc)
{
struct erofs_fs_context *ctx;

- /* pseudo mount for anon inodes */
- if (fc->sb_flags & SB_KERNMOUNT) {
- fc->ops = &erofs_anon_context_ops;
- return 0;
- }
-
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
@@ -824,12 +802,6 @@ static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi;

- /* pseudo mount for anon inodes */
- if (sb->s_flags & SB_KERNMOUNT) {
- kill_anon_super(sb);
- return;
- }
-
if (erofs_is_fscache_mode(sb))
kill_anon_super(sb);
else
@@ -868,7 +840,7 @@ static void erofs_put_super(struct super_block *sb)
erofs_fscache_unregister_fs(sb);
}

-struct file_system_type erofs_fs_type = {
+static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.init_fs_context = erofs_init_fs_context,
@@ -911,6 +883,10 @@ static int __init erofs_module_init(void)
if (err)
goto sysfs_err;

+ err = erofs_anon_register_fs();
+ if (err)
+ goto anon_err;
+
err = register_filesystem(&erofs_fs_type);
if (err)
goto fs_err;
@@ -918,6 +894,8 @@ static int __init erofs_module_init(void)
return 0;

fs_err:
+ erofs_anon_unregister_fs();
+anon_err:
erofs_exit_sysfs();
sysfs_err:
z_erofs_exit_zip_subsystem();
@@ -935,6 +913,7 @@ static int __init erofs_module_init(void)
static void __exit erofs_module_exit(void)
{
unregister_filesystem(&erofs_fs_type);
+ erofs_anon_unregister_fs();

/* Ensure all RCU free inodes / pclusters are safe to be destroyed. */
rcu_barrier();
--
2.31.1



2024-03-07 02:53:24

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi Baokun,

On 2024/3/7 10:44, Baokun Li wrote:
> Lockdep reported the following issue when mounting erofs with a domain_id:
>
> ============================================
> WARNING: possible recursive locking detected
> 6.8.0-rc7-xfstests #521 Not tainted
> --------------------------------------------
> mount/396 is trying to acquire lock:
> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> at: alloc_super+0xe3/0x3d0
>
> but task is already holding lock:
> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> at: alloc_super+0xe3/0x3d0
>
> other info that might help us debug this:
> Possible unsafe locking scenario:
>
> CPU0
> ----
> lock(&type->s_umount_key#50/1);
> lock(&type->s_umount_key#50/1);
>
> *** DEADLOCK ***
>
> May be due to missing lock nesting notation
>
> 2 locks held by mount/396:
> #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> at: alloc_super+0xe3/0x3d0
> #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
> at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>
> stack backtrace:
> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
> Call Trace:
> <TASK>
> dump_stack_lvl+0x64/0xb0
> validate_chain+0x5c4/0xa00
> __lock_acquire+0x6a9/0xd50
> lock_acquire+0xcd/0x2b0
> down_write_nested+0x45/0xd0
> alloc_super+0xe3/0x3d0
> sget_fc+0x62/0x2f0
> vfs_get_super+0x21/0x90
> vfs_get_tree+0x2c/0xf0
> fc_mount+0x12/0x40
> vfs_kern_mount.part.0+0x75/0x90
> kern_mount+0x24/0x40
> erofs_fscache_register_fs+0x1ef/0x270 [erofs]
> erofs_fc_fill_super+0x213/0x380 [erofs]
>
> This is because the file_system_type of both erofs and the pseudo-mount
> point of domain_id is erofs_fs_type, so two successive calls to
> alloc_super() are considered to be using the same lock and trigger the
> warning above.
>
> Therefore add a nodev file_system_type named erofs_anon_fs_type to
> silence this complaint. In addition, to reduce code coupling, refactor
> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
> and move the erofs_pseudo_mnt related code to fscache.c.
>
> Signed-off-by: Baokun Li <[email protected]>

IMHO, in the beginning, I'd like to avoid introducing another fs type
for erofs to share (meta)data between filesystems since it will cause
churn, could we use some alternative way to resolve this?

Or Jingbo might have some other ideas?

Thanks,
Gao Xiang

2024-03-07 03:31:31

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi Xiang,

On 2024/3/7 10:52, Gao Xiang wrote:
> Hi Baokun,
>
> On 2024/3/7 10:44, Baokun Li wrote:
>> Lockdep reported the following issue when mounting erofs with a
>> domain_id:
>>
>> ============================================
>> WARNING: possible recursive locking detected
>> 6.8.0-rc7-xfstests #521 Not tainted
>> --------------------------------------------
>> mount/396 is trying to acquire lock:
>> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>                         at: alloc_super+0xe3/0x3d0
>>
>> but task is already holding lock:
>> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>                         at: alloc_super+0xe3/0x3d0
>>
>> other info that might help us debug this:
>>   Possible unsafe locking scenario:
>>
>>         CPU0
>>         ----
>>    lock(&type->s_umount_key#50/1);
>>    lock(&type->s_umount_key#50/1);
>>
>>   *** DEADLOCK ***
>>
>>   May be due to missing lock nesting notation
>>
>> 2 locks held by mount/396:
>>   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>             at: alloc_super+0xe3/0x3d0
>>   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
>>             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>>
>> stack backtrace:
>> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
>> Call Trace:
>>   <TASK>
>>   dump_stack_lvl+0x64/0xb0
>>   validate_chain+0x5c4/0xa00
>>   __lock_acquire+0x6a9/0xd50
>>   lock_acquire+0xcd/0x2b0
>>   down_write_nested+0x45/0xd0
>>   alloc_super+0xe3/0x3d0
>>   sget_fc+0x62/0x2f0
>>   vfs_get_super+0x21/0x90
>>   vfs_get_tree+0x2c/0xf0
>>   fc_mount+0x12/0x40
>>   vfs_kern_mount.part.0+0x75/0x90
>>   kern_mount+0x24/0x40
>>   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
>>   erofs_fc_fill_super+0x213/0x380 [erofs]
>>
>> This is because the file_system_type of both erofs and the pseudo-mount
>> point of domain_id is erofs_fs_type, so two successive calls to
>> alloc_super() are considered to be using the same lock and trigger the
>> warning above.
>>
>> Therefore add a nodev file_system_type named erofs_anon_fs_type to
>> silence this complaint. In addition, to reduce code coupling, refactor
>> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb()
>> functions
>> and move the erofs_pseudo_mnt related code to fscache.c.
>>
>> Signed-off-by: Baokun Li <[email protected]>
>
> IMHO, in the beginning, I'd like to avoid introducing another fs type
> for erofs to share (meta)data between filesystems since it will cause
> churn, could we use some alternative way to resolve this?
>
> Or Jingbo might have some other ideas?
>
> Thanks,
> Gao Xiang

The usual way to avoid this kind of false positive is to add a subclass to
the lock, but s_umount is allocated, initialised and locked in
alloc_super(),
so we can't find a place to set the subclass.

Alternatively, kern_mount(&erofs_fs_type) could be moved to
erofs_module_init() or erofs_fc_parse_param() to avoid s_umount nesting,
but that would have looked a bit strange.

So the final choice was to add a new file_system_type to avoid this false
positive. Since you don't like the idea of adding a new file_system_type,
do you think it would be ok to move kern_mount(&erofs_fs_type) to
erofs_module_init() or erofs_fc_parse_param()?

Thanks!
--
With Best Regards,
Baokun Li
.

2024-03-07 03:42:07

by Jingbo Xu

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi Baokun,

Thanks for catching this!


On 3/7/24 10:52 AM, Gao Xiang wrote:
> Hi Baokun,
>
> On 2024/3/7 10:44, Baokun Li wrote:
>> Lockdep reported the following issue when mounting erofs with a
>> domain_id:
>>
>> ============================================
>> WARNING: possible recursive locking detected
>> 6.8.0-rc7-xfstests #521 Not tainted
>> --------------------------------------------
>> mount/396 is trying to acquire lock:
>> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>                         at: alloc_super+0xe3/0x3d0
>>
>> but task is already holding lock:
>> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>                         at: alloc_super+0xe3/0x3d0
>>
>> other info that might help us debug this:
>>   Possible unsafe locking scenario:
>>
>>         CPU0
>>         ----
>>    lock(&type->s_umount_key#50/1);
>>    lock(&type->s_umount_key#50/1);
>>
>>   *** DEADLOCK ***
>>
>>   May be due to missing lock nesting notation
>>
>> 2 locks held by mount/396:
>>   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>             at: alloc_super+0xe3/0x3d0
>>   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
>>             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>>
>> stack backtrace:
>> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
>> Call Trace:
>>   <TASK>
>>   dump_stack_lvl+0x64/0xb0
>>   validate_chain+0x5c4/0xa00
>>   __lock_acquire+0x6a9/0xd50
>>   lock_acquire+0xcd/0x2b0
>>   down_write_nested+0x45/0xd0
>>   alloc_super+0xe3/0x3d0
>>   sget_fc+0x62/0x2f0
>>   vfs_get_super+0x21/0x90
>>   vfs_get_tree+0x2c/0xf0
>>   fc_mount+0x12/0x40
>>   vfs_kern_mount.part.0+0x75/0x90
>>   kern_mount+0x24/0x40
>>   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
>>   erofs_fc_fill_super+0x213/0x380 [erofs]
>>
>> This is because the file_system_type of both erofs and the pseudo-mount
>> point of domain_id is erofs_fs_type, so two successive calls to
>> alloc_super() are considered to be using the same lock and trigger the
>> warning above.
>>
>> Therefore add a nodev file_system_type named erofs_anon_fs_type to
>> silence this complaint. In addition, to reduce code coupling, refactor
>> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
>> and move the erofs_pseudo_mnt related code to fscache.c.
>>
>> Signed-off-by: Baokun Li <[email protected]>
>
> IMHO, in the beginning, I'd like to avoid introducing another fs type
> for erofs to share (meta)data between filesystems since it will cause
> churn, could we use some alternative way to resolve this?

Yeah as Gao Xiang said, this is initially intended to avoid introducing
anothoer file_system_type, say erofs_anon_fs_type.

What we need is actually a method of allocating anonymous inode as a
sentinel identifying each blob. There is indeed a global mount, i.e.
anon_inode_mnt, for allocating anonymous inode/file specifically. At
the time the share domain feature is introduced, there's only one
anonymous inode, i.e. anon_inode_inode, and all the allocated anonymous
files are bound to this single anon_inode_inode. Thus we decided to
implement a erofs internal pseudo mount for this usage.

But I noticed that we can now allocate unique anonymous inodes from
anon_inode_mnt since commit e7e832c ("fs: add LSM-supporting anon-inode
interface"), though the new interface is initially for LSM usage.

--
Thanks,
Jingbo

2024-03-07 04:19:36

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi,

(try to +Cc Christian and Al here...)

On 2024/3/7 11:41, Jingbo Xu wrote:
> Hi Baokun,
>
> Thanks for catching this!
>
>
> On 3/7/24 10:52 AM, Gao Xiang wrote:
>> Hi Baokun,
>>
>> On 2024/3/7 10:44, Baokun Li wrote:
>>> Lockdep reported the following issue when mounting erofs with a
>>> domain_id:
>>>
>>> ============================================
>>> WARNING: possible recursive locking detected
>>> 6.8.0-rc7-xfstests #521 Not tainted
>>> --------------------------------------------
>>> mount/396 is trying to acquire lock:
>>> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>                         at: alloc_super+0xe3/0x3d0
>>>
>>> but task is already holding lock:
>>> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>                         at: alloc_super+0xe3/0x3d0
>>>
>>> other info that might help us debug this:
>>>   Possible unsafe locking scenario:
>>>
>>>         CPU0
>>>         ----
>>>    lock(&type->s_umount_key#50/1);
>>>    lock(&type->s_umount_key#50/1);
>>>
>>>   *** DEADLOCK ***
>>>
>>>   May be due to missing lock nesting notation
>>>
>>> 2 locks held by mount/396:
>>>   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>             at: alloc_super+0xe3/0x3d0
>>>   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
>>>             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>>>
>>> stack backtrace:
>>> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
>>> Call Trace:
>>>   <TASK>
>>>   dump_stack_lvl+0x64/0xb0
>>>   validate_chain+0x5c4/0xa00
>>>   __lock_acquire+0x6a9/0xd50
>>>   lock_acquire+0xcd/0x2b0
>>>   down_write_nested+0x45/0xd0
>>>   alloc_super+0xe3/0x3d0
>>>   sget_fc+0x62/0x2f0
>>>   vfs_get_super+0x21/0x90
>>>   vfs_get_tree+0x2c/0xf0
>>>   fc_mount+0x12/0x40
>>>   vfs_kern_mount.part.0+0x75/0x90
>>>   kern_mount+0x24/0x40
>>>   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
>>>   erofs_fc_fill_super+0x213/0x380 [erofs]
>>>
>>> This is because the file_system_type of both erofs and the pseudo-mount
>>> point of domain_id is erofs_fs_type, so two successive calls to
>>> alloc_super() are considered to be using the same lock and trigger the
>>> warning above.
>>>
>>> Therefore add a nodev file_system_type named erofs_anon_fs_type to
>>> silence this complaint. In addition, to reduce code coupling, refactor
>>> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
>>> and move the erofs_pseudo_mnt related code to fscache.c.
>>>
>>> Signed-off-by: Baokun Li <[email protected]>
>>
>> IMHO, in the beginning, I'd like to avoid introducing another fs type
>> for erofs to share (meta)data between filesystems since it will cause
>> churn, could we use some alternative way to resolve this?
>
> Yeah as Gao Xiang said, this is initially intended to avoid introducing
> anothoer file_system_type, say erofs_anon_fs_type.
>
> What we need is actually a method of allocating anonymous inode as a
> sentinel identifying each blob. There is indeed a global mount, i.e.
> anon_inode_mnt, for allocating anonymous inode/file specifically. At
> the time the share domain feature is introduced, there's only one
> anonymous inode, i.e. anon_inode_inode, and all the allocated anonymous
> files are bound to this single anon_inode_inode. Thus we decided to
> implement a erofs internal pseudo mount for this usage.
>
> But I noticed that we can now allocate unique anonymous inodes from
> anon_inode_mnt since commit e7e832c ("fs: add LSM-supporting anon-inode
> interface"), though the new interface is initially for LSM usage.

Yes, as summary, EROFS now maintains a bunch of anon inodes among
all different filesystem instances, so that like

blob sharing or
page cache sharing across filesystems can be done.

In brief, I think the following patch is a good idea but it
hasn't been landed until now:
https://lore.kernel.org/r/[email protected]

Other than that, is it a good idea to introduce another fs type
(like erofs_anon_fs_type) for such usage?

It's much appreciated to get more inputs of this, thanks a lot!

Thanks,
Gao Xiang

>

2024-03-07 05:08:02

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On Thu, Mar 07, 2024 at 10:44:59AM +0800, Baokun Li wrote:

> +static int erofs_anon_init_fs_context(struct fs_context *fc)
> +{
> + fc->ops = &erofs_anon_context_ops;
> + return 0;
> +}


ITYM
struct pseudo_fs_context *ctx = init_pseudo(fc, EROFS_SUPER_MAGIC);
return ctx ? 0 : -ENOMEM;

and to hell with erofs_anon_context_ops, along with its fill_super, calls
of simple_fill_super(), etc. Unless I'm missing something, you are not
even creating dentries here, let alone making them possible to look up.

> +static void erofs_kill_pseudo_sb(struct super_block *sb)
> +{
> + kill_anon_super(sb);
> +}

*blink*

What's wrong with simply using kill_anon_super as ->kill_sb?

> +int erofs_anon_register_fs(void)
> +{
> + return register_filesystem(&erofs_anon_fs_type);
> +}

What for? The only thing it gives you is an ability to look it up by
name. Which is completely pointless, IMO,

> if (!erofs_pseudo_mnt) {
> - struct vfsmount *mnt = kern_mount(&erofs_fs_type);
> + struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);

.. since you are getting to it by direct reference to file_system_type
anyway. Same unregistering, of course...

2024-03-07 06:46:20

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On 2024/3/7 11:41, Jingbo Xu wrote:
> Hi Baokun,
>
> Thanks for catching this!
>
>
> On 3/7/24 10:52 AM, Gao Xiang wrote:
>> Hi Baokun,
>>
>> On 2024/3/7 10:44, Baokun Li wrote:
>>> Lockdep reported the following issue when mounting erofs with a
>>> domain_id:
>>>
>>> ============================================
>>> WARNING: possible recursive locking detected
>>> 6.8.0-rc7-xfstests #521 Not tainted
>>> --------------------------------------------
>>> mount/396 is trying to acquire lock:
>>> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>                         at: alloc_super+0xe3/0x3d0
>>>
>>> but task is already holding lock:
>>> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>                         at: alloc_super+0xe3/0x3d0
>>>
>>> other info that might help us debug this:
>>>   Possible unsafe locking scenario:
>>>
>>>         CPU0
>>>         ----
>>>    lock(&type->s_umount_key#50/1);
>>>    lock(&type->s_umount_key#50/1);
>>>
>>>   *** DEADLOCK ***
>>>
>>>   May be due to missing lock nesting notation
>>>
>>> 2 locks held by mount/396:
>>>   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>             at: alloc_super+0xe3/0x3d0
>>>   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
>>>             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>>>
>>> stack backtrace:
>>> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
>>> Call Trace:
>>>   <TASK>
>>>   dump_stack_lvl+0x64/0xb0
>>>   validate_chain+0x5c4/0xa00
>>>   __lock_acquire+0x6a9/0xd50
>>>   lock_acquire+0xcd/0x2b0
>>>   down_write_nested+0x45/0xd0
>>>   alloc_super+0xe3/0x3d0
>>>   sget_fc+0x62/0x2f0
>>>   vfs_get_super+0x21/0x90
>>>   vfs_get_tree+0x2c/0xf0
>>>   fc_mount+0x12/0x40
>>>   vfs_kern_mount.part.0+0x75/0x90
>>>   kern_mount+0x24/0x40
>>>   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
>>>   erofs_fc_fill_super+0x213/0x380 [erofs]
>>>
>>> This is because the file_system_type of both erofs and the pseudo-mount
>>> point of domain_id is erofs_fs_type, so two successive calls to
>>> alloc_super() are considered to be using the same lock and trigger the
>>> warning above.
>>>
>>> Therefore add a nodev file_system_type named erofs_anon_fs_type to
>>> silence this complaint. In addition, to reduce code coupling, refactor
>>> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
>>> and move the erofs_pseudo_mnt related code to fscache.c.
>>>
>>> Signed-off-by: Baokun Li <[email protected]>
>> IMHO, in the beginning, I'd like to avoid introducing another fs type
>> for erofs to share (meta)data between filesystems since it will cause
>> churn, could we use some alternative way to resolve this?
> Yeah as Gao Xiang said, this is initially intended to avoid introducing
> anothoer file_system_type, say erofs_anon_fs_type.
>
> What we need is actually a method of allocating anonymous inode as a
> sentinel identifying each blob. There is indeed a global mount, i.e.
> anon_inode_mnt, for allocating anonymous inode/file specifically. At
> the time the share domain feature is introduced, there's only one
> anonymous inode, i.e. anon_inode_inode, and all the allocated anonymous
> files are bound to this single anon_inode_inode. Thus we decided to
> implement a erofs internal pseudo mount for this usage.
>
> But I noticed that we can now allocate unique anonymous inodes from
> anon_inode_mnt since commit e7e832c ("fs: add LSM-supporting anon-inode
> interface"), though the new interface is initially for LSM usage.
>
Thank you for your feedback!

If I understand you correctly, you mean to remove erofs_pseudo_mnt
directly to avoid this false positive, and use anon_inode_create_getfile()
to create the required anonymous inode.

--
With Best Regards,
Baokun Li
.

2024-03-07 06:58:05

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi,

On 2024/3/7 14:46, Baokun Li wrote:
> On 2024/3/7 11:41, Jingbo Xu wrote:
>> Hi Baokun,
>>

..

> Thank you for your feedback!
>
> If I understand you correctly, you mean to remove erofs_pseudo_mnt
> directly to avoid this false positive, and use anon_inode_create_getfile()
> to create the required anonymous inode.
As Al pointed out, you could just follow his ideas
since it's mainly a VFS POV...

Thanks,
Gao Xiang

>

2024-03-07 07:07:13

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On 2024/3/7 13:07, Al Viro wrote:
> On Thu, Mar 07, 2024 at 10:44:59AM +0800, Baokun Li wrote:
>
>> +static int erofs_anon_init_fs_context(struct fs_context *fc)
>> +{
>> + fc->ops = &erofs_anon_context_ops;
>> + return 0;
>> +}
>
> ITYM
> struct pseudo_fs_context *ctx = init_pseudo(fc, EROFS_SUPER_MAGIC);
> return ctx ? 0 : -ENOMEM;
>
> and to hell with erofs_anon_context_ops, along with its fill_super, calls
> of simple_fill_super(), etc. Unless I'm missing something, you are not
> even creating dentries here, let alone making them possible to look up.
Totally agree! It does streamline a lot!
>> +static void erofs_kill_pseudo_sb(struct super_block *sb)
>> +{
>> + kill_anon_super(sb);
>> +}
> *blink*
>
> What's wrong with simply using kill_anon_super as ->kill_sb?
Sure, I'll just use kill_anon_super().
>> +int erofs_anon_register_fs(void)
>> +{
>> + return register_filesystem(&erofs_anon_fs_type);
>> +}
> What for? The only thing it gives you is an ability to look it up by
> name. Which is completely pointless, IMO,
The helper function here is to avoid extern erofs_anon_fs_type(), because
we define it in fscache.c, but also use it in super.c. Moreover, we
don't need
to register it when CONFIG_EROFS_FS_ONDEMAND is not enabled, so we
also use this helper function for code isolation.
>
>> if (!erofs_pseudo_mnt) {
>> - struct vfsmount *mnt = kern_mount(&erofs_fs_type);
>> + struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
> ... since you are getting to it by direct reference to file_system_type
> anyway. Same unregistering, of course...
Thanks for having a look!
--
With Best Regards,
Baokun Li
.

2024-03-07 07:21:51

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On Thu, Mar 07, 2024 at 03:06:49PM +0800, Baokun Li wrote:
> > > +int erofs_anon_register_fs(void)
> > > +{
> > > + return register_filesystem(&erofs_anon_fs_type);
> > > +}
> > What for? The only thing it gives you is an ability to look it up by
> > name. Which is completely pointless, IMO,
> The helper function here is to avoid extern erofs_anon_fs_type(), because
> we define it in fscache.c, but also use it in super.c. Moreover, we don't
> need
> to register it when CONFIG_EROFS_FS_ONDEMAND is not enabled, so we

You don't need to register it at all.

The one and only effect of register_filesystem() is making file_system_type
instance visible to get_fs_type() (and making it show up in /proc/filesystems).

That's it. If you want to have it looked up by name (e.g. for userland
mounts), you need to register. If not, you do not need to do that.

Note that kern_mount() take a pointer to struct file_system_type,
not its (string) name. So all you get from registration is an extra line
in /proc/filesystems. What's the point?

2024-03-07 07:43:56

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On 2024/3/7 15:21, Al Viro wrote:
> On Thu, Mar 07, 2024 at 03:06:49PM +0800, Baokun Li wrote:
>>>> +int erofs_anon_register_fs(void)
>>>> +{
>>>> + return register_filesystem(&erofs_anon_fs_type);
>>>> +}
>>> What for? The only thing it gives you is an ability to look it up by
>>> name. Which is completely pointless, IMO,
>> The helper function here is to avoid extern erofs_anon_fs_type(), because
>> we define it in fscache.c, but also use it in super.c. Moreover, we don't
>> need
>> to register it when CONFIG_EROFS_FS_ONDEMAND is not enabled, so we
> You don't need to register it at all.
>
> The one and only effect of register_filesystem() is making file_system_type
> instance visible to get_fs_type() (and making it show up in /proc/filesystems).
>
> That's it. If you want to have it looked up by name (e.g. for userland
> mounts), you need to register. If not, you do not need to do that.
>
> Note that kern_mount() take a pointer to struct file_system_type,
> not its (string) name. So all you get from registration is an extra line
> in /proc/filesystems. What's the point?
It's dawning on me, thank you so much for explaining! ღ( ´・ᴗ・` )

--
With Best Regards,
Baokun Li
.

2024-03-07 09:08:40

by Baokun Li

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On 2024/3/7 16:46, Al Viro wrote:
> On Thu, Mar 07, 2024 at 07:21:12AM +0000, Al Viro wrote:
>> On Thu, Mar 07, 2024 at 03:06:49PM +0800, Baokun Li wrote:
>>>>> +int erofs_anon_register_fs(void)
>>>>> +{
>>>>> + return register_filesystem(&erofs_anon_fs_type);
>>>>> +}
>>>> What for? The only thing it gives you is an ability to look it up by
>>>> name. Which is completely pointless, IMO,
>>> The helper function here is to avoid extern erofs_anon_fs_type(), because
>>> we define it in fscache.c, but also use it in super.c. Moreover, we don't
>>> need
>>> to register it when CONFIG_EROFS_FS_ONDEMAND is not enabled, so we
>> You don't need to register it at all.
>>
>> The one and only effect of register_filesystem() is making file_system_type
>> instance visible to get_fs_type() (and making it show up in /proc/filesystems).
>>
>> That's it. If you want to have it looked up by name (e.g. for userland
>> mounts), you need to register. If not, you do not need to do that.
>>
>> Note that kern_mount() take a pointer to struct file_system_type,
>> not its (string) name. So all you get from registration is an extra line
>> in /proc/filesystems. What's the point?
> PS: at one point I considered renaming it to something that would sound
> less vague, but the best variant I'd been able to come up with was
> "publish_filesystem()", which is not much better and has an extra problem -
> how do you describe the reverse of that? "withdraw_filesystem()"?
> Decided that it wasn't worth the amount of noise and headache...
I feel the emphasis on "fs_name" rather than "filesystem" is less
likely to be misunderstood. What do you think about renaming
to add_fs_name/remove_fs_name?

--
With Best Regards,
Baokun Li
.

2024-03-07 09:19:35

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On Thu, Mar 07, 2024 at 12:18:52PM +0800, Gao Xiang wrote:
> Hi,
>
> (try to +Cc Christian and Al here...)
>
> On 2024/3/7 11:41, Jingbo Xu wrote:
> > Hi Baokun,
> >
> > Thanks for catching this!
> >
> >
> > On 3/7/24 10:52 AM, Gao Xiang wrote:
> > > Hi Baokun,
> > >
> > > On 2024/3/7 10:44, Baokun Li wrote:
> > > > Lockdep reported the following issue when mounting erofs with a
> > > > domain_id:
> > > >
> > > > ============================================
> > > > WARNING: possible recursive locking detected
> > > > 6.8.0-rc7-xfstests #521 Not tainted
> > > > --------------------------------------------
> > > > mount/396 is trying to acquire lock:
> > > > ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> > > >                         at: alloc_super+0xe3/0x3d0
> > > >
> > > > but task is already holding lock:
> > > > ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> > > >                         at: alloc_super+0xe3/0x3d0
> > > >
> > > > other info that might help us debug this:
> > > >   Possible unsafe locking scenario:
> > > >
> > > >         CPU0
> > > >         ----
> > > >    lock(&type->s_umount_key#50/1);
> > > >    lock(&type->s_umount_key#50/1);
> > > >
> > > >   *** DEADLOCK ***
> > > >
> > > >   May be due to missing lock nesting notation
> > > >
> > > > 2 locks held by mount/396:
> > > >   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
> > > >             at: alloc_super+0xe3/0x3d0
> > > >   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
> > > >             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
> > > >
> > > > stack backtrace:
> > > > CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
> > > > Call Trace:
> > > >   <TASK>
> > > >   dump_stack_lvl+0x64/0xb0
> > > >   validate_chain+0x5c4/0xa00
> > > >   __lock_acquire+0x6a9/0xd50
> > > >   lock_acquire+0xcd/0x2b0
> > > >   down_write_nested+0x45/0xd0
> > > >   alloc_super+0xe3/0x3d0
> > > >   sget_fc+0x62/0x2f0
> > > >   vfs_get_super+0x21/0x90
> > > >   vfs_get_tree+0x2c/0xf0
> > > >   fc_mount+0x12/0x40
> > > >   vfs_kern_mount.part.0+0x75/0x90
> > > >   kern_mount+0x24/0x40
> > > >   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
> > > >   erofs_fc_fill_super+0x213/0x380 [erofs]
> > > >
> > > > This is because the file_system_type of both erofs and the pseudo-mount
> > > > point of domain_id is erofs_fs_type, so two successive calls to
> > > > alloc_super() are considered to be using the same lock and trigger the
> > > > warning above.
> > > >
> > > > Therefore add a nodev file_system_type named erofs_anon_fs_type to
> > > > silence this complaint. In addition, to reduce code coupling, refactor
> > > > out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
> > > > and move the erofs_pseudo_mnt related code to fscache.c.
> > > >
> > > > Signed-off-by: Baokun Li <[email protected]>
> > >
> > > IMHO, in the beginning, I'd like to avoid introducing another fs type
> > > for erofs to share (meta)data between filesystems since it will cause
> > > churn, could we use some alternative way to resolve this?
> >
> > Yeah as Gao Xiang said, this is initially intended to avoid introducing
> > anothoer file_system_type, say erofs_anon_fs_type.
> >
> > What we need is actually a method of allocating anonymous inode as a
> > sentinel identifying each blob. There is indeed a global mount, i.e.
> > anon_inode_mnt, for allocating anonymous inode/file specifically. At
> > the time the share domain feature is introduced, there's only one
> > anonymous inode, i.e. anon_inode_inode, and all the allocated anonymous
> > files are bound to this single anon_inode_inode. Thus we decided to
> > implement a erofs internal pseudo mount for this usage.
> >
> > But I noticed that we can now allocate unique anonymous inodes from
> > anon_inode_mnt since commit e7e832c ("fs: add LSM-supporting anon-inode
> > interface"), though the new interface is initially for LSM usage.
>
> Yes, as summary, EROFS now maintains a bunch of anon inodes among
> all different filesystem instances, so that like
>
> blob sharing or
> page cache sharing across filesystems can be done.
>
> In brief, I think the following patch is a good idea but it
> hasn't been landed until now:
> https://lore.kernel.org/r/[email protected]
>
> Other than that, is it a good idea to introduce another fs type
> (like erofs_anon_fs_type) for such usage?

It depends. If you're allocating a lot of inodes then having a separate
filesystem type for erofs makes sense. If it's just a few then it
probably doesn't matter. If you need custom inode operations for these
anonymous inodes then it also makes sense to have a separate filesystem
type.

2024-03-07 09:21:37

by Gao Xiang

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

Hi Christian,

On 2024/3/7 17:17, Christian Brauner wrote:
> On Thu, Mar 07, 2024 at 12:18:52PM +0800, Gao Xiang wrote:
>> Hi,
>>
>> (try to +Cc Christian and Al here...)
>>
>> On 2024/3/7 11:41, Jingbo Xu wrote:
>>> Hi Baokun,
>>>
>>> Thanks for catching this!
>>>
>>>
>>> On 3/7/24 10:52 AM, Gao Xiang wrote:
>>>> Hi Baokun,
>>>>
>>>> On 2024/3/7 10:44, Baokun Li wrote:
>>>>> Lockdep reported the following issue when mounting erofs with a
>>>>> domain_id:
>>>>>
>>>>> ============================================
>>>>> WARNING: possible recursive locking detected
>>>>> 6.8.0-rc7-xfstests #521 Not tainted
>>>>> --------------------------------------------
>>>>> mount/396 is trying to acquire lock:
>>>>> ffff907a8aaaa0e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>>>                         at: alloc_super+0xe3/0x3d0
>>>>>
>>>>> but task is already holding lock:
>>>>> ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>>>                         at: alloc_super+0xe3/0x3d0
>>>>>
>>>>> other info that might help us debug this:
>>>>>   Possible unsafe locking scenario:
>>>>>
>>>>>         CPU0
>>>>>         ----
>>>>>    lock(&type->s_umount_key#50/1);
>>>>>    lock(&type->s_umount_key#50/1);
>>>>>
>>>>>   *** DEADLOCK ***
>>>>>
>>>>>   May be due to missing lock nesting notation
>>>>>
>>>>> 2 locks held by mount/396:
>>>>>   #0: ffff907a8aaa90e0 (&type->s_umount_key#50/1){+.+.}-{3:3},
>>>>>             at: alloc_super+0xe3/0x3d0
>>>>>   #1: ffffffffc00e6f28 (erofs_domain_list_lock){+.+.}-{3:3},
>>>>>             at: erofs_fscache_register_fs+0x3d/0x270 [erofs]
>>>>>
>>>>> stack backtrace:
>>>>> CPU: 1 PID: 396 Comm: mount Not tainted 6.8.0-rc7-xfstests #521
>>>>> Call Trace:
>>>>>   <TASK>
>>>>>   dump_stack_lvl+0x64/0xb0
>>>>>   validate_chain+0x5c4/0xa00
>>>>>   __lock_acquire+0x6a9/0xd50
>>>>>   lock_acquire+0xcd/0x2b0
>>>>>   down_write_nested+0x45/0xd0
>>>>>   alloc_super+0xe3/0x3d0
>>>>>   sget_fc+0x62/0x2f0
>>>>>   vfs_get_super+0x21/0x90
>>>>>   vfs_get_tree+0x2c/0xf0
>>>>>   fc_mount+0x12/0x40
>>>>>   vfs_kern_mount.part.0+0x75/0x90
>>>>>   kern_mount+0x24/0x40
>>>>>   erofs_fscache_register_fs+0x1ef/0x270 [erofs]
>>>>>   erofs_fc_fill_super+0x213/0x380 [erofs]
>>>>>
>>>>> This is because the file_system_type of both erofs and the pseudo-mount
>>>>> point of domain_id is erofs_fs_type, so two successive calls to
>>>>> alloc_super() are considered to be using the same lock and trigger the
>>>>> warning above.
>>>>>
>>>>> Therefore add a nodev file_system_type named erofs_anon_fs_type to
>>>>> silence this complaint. In addition, to reduce code coupling, refactor
>>>>> out the erofs_anon_init_fs_context() and erofs_kill_pseudo_sb() functions
>>>>> and move the erofs_pseudo_mnt related code to fscache.c.
>>>>>
>>>>> Signed-off-by: Baokun Li <[email protected]>
>>>>
>>>> IMHO, in the beginning, I'd like to avoid introducing another fs type
>>>> for erofs to share (meta)data between filesystems since it will cause
>>>> churn, could we use some alternative way to resolve this?
>>>
>>> Yeah as Gao Xiang said, this is initially intended to avoid introducing
>>> anothoer file_system_type, say erofs_anon_fs_type.
>>>
>>> What we need is actually a method of allocating anonymous inode as a
>>> sentinel identifying each blob. There is indeed a global mount, i.e.
>>> anon_inode_mnt, for allocating anonymous inode/file specifically. At
>>> the time the share domain feature is introduced, there's only one
>>> anonymous inode, i.e. anon_inode_inode, and all the allocated anonymous
>>> files are bound to this single anon_inode_inode. Thus we decided to
>>> implement a erofs internal pseudo mount for this usage.
>>>
>>> But I noticed that we can now allocate unique anonymous inodes from
>>> anon_inode_mnt since commit e7e832c ("fs: add LSM-supporting anon-inode
>>> interface"), though the new interface is initially for LSM usage.
>>
>> Yes, as summary, EROFS now maintains a bunch of anon inodes among
>> all different filesystem instances, so that like
>>
>> blob sharing or
>> page cache sharing across filesystems can be done.
>>
>> In brief, I think the following patch is a good idea but it
>> hasn't been landed until now:
>> https://lore.kernel.org/r/[email protected]
>>
>> Other than that, is it a good idea to introduce another fs type
>> (like erofs_anon_fs_type) for such usage?
>
> It depends. If you're allocating a lot of inodes then having a separate
> filesystem type for erofs makes sense. If it's just a few then it
> probably doesn't matter. If you need custom inode operations for these
> anonymous inodes then it also makes sense to have a separate filesystem
> type.

Yeah, I think some time this year we will finish a formal
page cache sharing design and implementation for both bdev
and fscache mode.

So a separate filesystem type seems more reasonable in the
future, thanks for your confirmation!

Thanks,
Gao Xiang

2024-03-07 09:49:48

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] erofs: fix lockdep false positives on initializing erofs_pseudo_mnt

On Thu, Mar 07, 2024 at 07:21:12AM +0000, Al Viro wrote:
> On Thu, Mar 07, 2024 at 03:06:49PM +0800, Baokun Li wrote:
> > > > +int erofs_anon_register_fs(void)
> > > > +{
> > > > + return register_filesystem(&erofs_anon_fs_type);
> > > > +}
> > > What for? The only thing it gives you is an ability to look it up by
> > > name. Which is completely pointless, IMO,
> > The helper function here is to avoid extern erofs_anon_fs_type(), because
> > we define it in fscache.c, but also use it in super.c. Moreover, we don't
> > need
> > to register it when CONFIG_EROFS_FS_ONDEMAND is not enabled, so we
>
> You don't need to register it at all.
>
> The one and only effect of register_filesystem() is making file_system_type
> instance visible to get_fs_type() (and making it show up in /proc/filesystems).
>
> That's it. If you want to have it looked up by name (e.g. for userland
> mounts), you need to register. If not, you do not need to do that.
>
> Note that kern_mount() take a pointer to struct file_system_type,
> not its (string) name. So all you get from registration is an extra line
> in /proc/filesystems. What's the point?

PS: at one point I considered renaming it to something that would sound
less vague, but the best variant I'd been able to come up with was
"publish_filesystem()", which is not much better and has an extra problem -
how do you describe the reverse of that? "withdraw_filesystem()"?
Decided that it wasn't worth the amount of noise and headache...