2023-08-08 20:35:57

by Christoph Hellwig

[permalink] [raw]
Subject: s_fs_info and ->kill_sb revisited

Hi all,

this series is against the VFS vfs.super branch does two slightly
related things:

- move closing of the external devices in ext4 and xfs from ->put_super
into ->kill_sb so that this isn't done under s_umount which creates
lock ordere reversal
- move freeing the private dta in s_fs_info into ->kill_sb for file systems
that pass it in through the fs_context, as otherwise we could leak it
before fill_super is called (this is something new on the vfs.super
branch because of the changed place where blkdev_get is called)

Diffstat:
exfat/exfat_fs.h | 2 -
exfat/super.c | 39 +++++++++++++-------------
ext4/super.c | 50 +++++++++++++++++-----------------
ntfs3/super.c | 33 ++++++++++------------
xfs/xfs_buf.c | 7 +++-
xfs/xfs_super.c | 80 +++++++++++++++++++++++++------------------------------
6 files changed, 102 insertions(+), 109 deletions(-)


2023-08-08 20:35:58

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 06/13] xfs: close the RT and log block devices in xfs_free_buftarg

Closing the block devices logically belongs into xfs_free_buftarg, So instead
of open coding it in the caller move it there and add a check for the s_bdev
so that the main device isn't close as that's done by the VFS helper.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/xfs/xfs_buf.c | 5 +++++
fs/xfs/xfs_super.c | 12 ++----------
2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 83b8702030f71d..c57e6e03dfa80c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1938,6 +1938,8 @@ void
xfs_free_buftarg(
struct xfs_buftarg *btp)
{
+ struct block_device *bdev = btp->bt_bdev;
+
unregister_shrinker(&btp->bt_shrinker);
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
percpu_counter_destroy(&btp->bt_io_count);
@@ -1945,6 +1947,9 @@ xfs_free_buftarg(

blkdev_issue_flush(btp->bt_bdev);
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+ /* the main block device is closed by kill_block_super */
+ if (bdev != btp->bt_mount->m_super->s_bdev)
+ blkdev_put(bdev, btp->bt_mount->m_super);

kmem_free(btp);
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f00d1162815d19..37b1b763a0bef0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -399,18 +399,10 @@ STATIC void
xfs_close_devices(
struct xfs_mount *mp)
{
- if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
xfs_free_buftarg(mp->m_logdev_targp);
- blkdev_put(logdev, mp->m_super);
- }
- if (mp->m_rtdev_targp) {
- struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-
+ if (mp->m_rtdev_targp)
xfs_free_buftarg(mp->m_rtdev_targp);
- blkdev_put(rtdev, mp->m_super);
- }
xfs_free_buftarg(mp->m_ddev_targp);
}

--
2.39.2


2023-08-08 20:37:08

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 09/13] exfat: don't RCU-free the sbi

There are no RCU critical sections for accessing any information in the
sbi, so drop the call_rcu indirection for freeing the sbi.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/exfat/exfat_fs.h | 2 --
fs/exfat/super.c | 15 ++++-----------
2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 729ada9e26e82e..f55498e5c23d46 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -273,8 +273,6 @@ struct exfat_sb_info {

spinlock_t inode_hash_lock;
struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
-
- struct rcu_head rcu;
};

#define EXFAT_CACHE_VALID 0
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c32460e031e80..3c6aec96d0dc85 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -31,16 +31,6 @@ static void exfat_free_iocharset(struct exfat_sb_info *sbi)
kfree(sbi->options.iocharset);
}

-static void exfat_delayed_free(struct rcu_head *p)
-{
- struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
-
- unload_nls(sbi->nls_io);
- exfat_free_iocharset(sbi);
- exfat_free_upcase_table(sbi);
- kfree(sbi);
-}
-
static void exfat_put_super(struct super_block *sb)
{
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -50,7 +40,10 @@ static void exfat_put_super(struct super_block *sb)
brelse(sbi->boot_bh);
mutex_unlock(&sbi->s_lock);

- call_rcu(&sbi->rcu, exfat_delayed_free);
+ unload_nls(sbi->nls_io);
+ exfat_free_iocharset(sbi);
+ exfat_free_upcase_table(sbi);
+ kfree(sbi);
}

static int exfat_sync_fs(struct super_block *sb, int wait)
--
2.39.2


2023-08-08 21:04:47

by Christoph Hellwig

[permalink] [raw]
Subject: [PATCH 13/13] ntfs3: free the sbi in ->kill_sb

As a rule of thumb everything allocated to the fs_context and moved into
the super_block should be freed by ->kill_sb so that the teardown
handling doesn't need to be duplicated between the fill_super error
path and put_super. Implement an ntfs3-specific kill_sb method to do
that.

Signed-off-by: Christoph Hellwig <[email protected]>
---
fs/ntfs3/super.c | 25 ++++++++++++-------------
1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 727138933a9324..5fffddea554f18 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -625,10 +625,6 @@ static void ntfs_put_super(struct super_block *sb)

/* Mark rw ntfs as clear, if possible. */
ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
-
- put_mount_options(sbi->options);
- ntfs3_free_sbi(sbi);
- sb->s_fs_info = NULL;
}

static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1562,15 +1558,7 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc)
put_inode_out:
iput(inode);
out:
- /*
- * Free resources here.
- * ntfs_fs_free will be called with fc->s_fs_info = NULL
- */
- put_mount_options(sbi->options);
- ntfs3_free_sbi(sbi);
- sb->s_fs_info = NULL;
kfree(boot2);
-
return err;
}

@@ -1726,13 +1714,24 @@ static int ntfs_init_fs_context(struct fs_context *fc)
return -ENOMEM;
}

+static void ntfs3_kill_sb(struct super_block *sb)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ kill_block_super(sb);
+
+ if (sbi->options)
+ put_mount_options(sbi->options);
+ ntfs3_free_sbi(sbi);
+}
+
// clang-format off
static struct file_system_type ntfs_fs_type = {
.owner = THIS_MODULE,
.name = "ntfs3",
.init_fs_context = ntfs_init_fs_context,
.parameters = ntfs_fs_parameters,
- .kill_sb = kill_block_super,
+ .kill_sb = ntfs3_kill_sb,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
// clang-format on
--
2.39.2


2023-08-09 08:25:34

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH 13/13] ntfs3: free the sbi in ->kill_sb

On Tue, Aug 08, 2023 at 09:16:00AM -0700, Christoph Hellwig wrote:
> As a rule of thumb everything allocated to the fs_context and moved into
> the super_block should be freed by ->kill_sb so that the teardown
> handling doesn't need to be duplicated between the fill_super error
> path and put_super. Implement an ntfs3-specific kill_sb method to do
> that.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---

Looks good to me,
Reviewed-by: Christian Brauner <[email protected]>

2023-08-09 16:22:41

by Darrick J. Wong

[permalink] [raw]
Subject: Re: [PATCH 06/13] xfs: close the RT and log block devices in xfs_free_buftarg

On Tue, Aug 08, 2023 at 09:15:53AM -0700, Christoph Hellwig wrote:
> Closing the block devices logically belongs into xfs_free_buftarg, So instead
> of open coding it in the caller move it there and add a check for the s_bdev
> so that the main device isn't close as that's done by the VFS helper.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> fs/xfs/xfs_buf.c | 5 +++++
> fs/xfs/xfs_super.c | 12 ++----------
> 2 files changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 83b8702030f71d..c57e6e03dfa80c 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -1938,6 +1938,8 @@ void
> xfs_free_buftarg(
> struct xfs_buftarg *btp)
> {
> + struct block_device *bdev = btp->bt_bdev;
> +
> unregister_shrinker(&btp->bt_shrinker);
> ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
> percpu_counter_destroy(&btp->bt_io_count);
> @@ -1945,6 +1947,9 @@ xfs_free_buftarg(
>
> blkdev_issue_flush(btp->bt_bdev);
> fs_put_dax(btp->bt_daxdev, btp->bt_mount);
> + /* the main block device is closed by kill_block_super */
> + if (bdev != btp->bt_mount->m_super->s_bdev)
> + blkdev_put(bdev, btp->bt_mount->m_super);

Hmm... I feel like this would be cleaner if the data dev buftarg could
get its own refcount separate from super_block.s_bdev, but I looked
through the code and couldn't identify a simple way to do that. Soo...

Reviewed-by: Darrick J. Wong <[email protected]>

--D


>
> kmem_free(btp);
> }
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index f00d1162815d19..37b1b763a0bef0 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -399,18 +399,10 @@ STATIC void
> xfs_close_devices(
> struct xfs_mount *mp)
> {
> - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
> - struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
> -
> + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
> xfs_free_buftarg(mp->m_logdev_targp);
> - blkdev_put(logdev, mp->m_super);
> - }
> - if (mp->m_rtdev_targp) {
> - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
> -
> + if (mp->m_rtdev_targp)
> xfs_free_buftarg(mp->m_rtdev_targp);
> - blkdev_put(rtdev, mp->m_super);
> - }
> xfs_free_buftarg(mp->m_ddev_targp);
> }
>
> --
> 2.39.2
>

2023-08-09 16:38:47

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 06/13] xfs: close the RT and log block devices in xfs_free_buftarg

On Wed, Aug 09, 2023 at 08:45:32AM -0700, Darrick J. Wong wrote:
> > blkdev_issue_flush(btp->bt_bdev);
> > fs_put_dax(btp->bt_daxdev, btp->bt_mount);
> > + /* the main block device is closed by kill_block_super */
> > + if (bdev != btp->bt_mount->m_super->s_bdev)
> > + blkdev_put(bdev, btp->bt_mount->m_super);
>
> Hmm... I feel like this would be cleaner if the data dev buftarg could
> get its own refcount separate from super_block.s_bdev, but I looked
> through the code and couldn't identify a simple way to do that. Soo...

blkdev_put doesn't really drop a refcount, it closes the device.
It just happens to be misnamed, but Jan is looking into a series that
will as a side effect end up with a better name for this functionality.