2022-09-25 14:33:10

by Shiyang Ruan

[permalink] [raw]
Subject: [PATCH v9 0/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

Changes since v8:
1. P2: rename drop_pagecache_sb() to super_drop_pagecache().
2. P2: let super_drop_pagecache() accept invalidate method.
3. P3: invalidate all dax mappings by invalidate_inode_pages2().
4. P3: shutdown the filesystem when it is to be removed.
5. Rebase on 6.0-rc6 + Darrick's patch[1] + Dan's patch[2].

[1]: https://lore.kernel.org/linux-xfs/Yv5wIa2crHioYeRr@magnolia/
[2]: https://lore.kernel.org/linux-xfs/166153426798.2758201.15108211981034512993.stgit@dwillia2-xfh.jf.intel.com/

Shiyang Ruan (3):
xfs: fix the calculation of length and end
fs: move drop_pagecache_sb() for others to use
mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

drivers/dax/super.c | 3 ++-
fs/drop_caches.c | 35 ++----------------------------
fs/super.c | 43 +++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_notify_failure.c | 36 ++++++++++++++++++++++++++-----
include/linux/fs.h | 1 +
include/linux/mm.h | 1 +
include/linux/pagemap.h | 1 +
mm/truncate.c | 20 +++++++++++++++--
8 files changed, 99 insertions(+), 41 deletions(-)

--
2.37.3


2022-09-25 14:33:18

by Shiyang Ruan

[permalink] [raw]
Subject: [PATCH 2/3] fs: move drop_pagecache_sb() for others to use

xfs_notify_failure.c requires a method to invalidate all dax mappings.
drop_pagecache_sb() can do this but it is a static function and only
build with CONFIG_SYSCTL. Now, move it to super.c and make it available
for others. And use its second argument to choose which invalidate
method to use.

Signed-off-by: Shiyang Ruan <[email protected]>
---
fs/drop_caches.c | 35 ++-------------------------------
fs/super.c | 43 +++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 1 +
include/linux/pagemap.h | 1 +
mm/truncate.c | 20 +++++++++++++++++--
5 files changed, 65 insertions(+), 35 deletions(-)

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index e619c31b6bd9..4c9281885077 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -15,38 +15,6 @@
/* A global variable is a bit ugly, but it keeps the code simple */
int sysctl_drop_caches;

-static void drop_pagecache_sb(struct super_block *sb, void *unused)
-{
- struct inode *inode, *toput_inode = NULL;
-
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- spin_lock(&inode->i_lock);
- /*
- * We must skip inodes in unusual state. We may also skip
- * inodes without pages but we deliberately won't in case
- * we need to reschedule to avoid softlockups.
- */
- if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (mapping_empty(inode->i_mapping) && !need_resched())) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
-
- invalidate_mapping_pages(inode->i_mapping, 0, -1);
- iput(toput_inode);
- toput_inode = inode;
-
- cond_resched();
- spin_lock(&sb->s_inode_list_lock);
- }
- spin_unlock(&sb->s_inode_list_lock);
- iput(toput_inode);
-}
-
int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
@@ -59,7 +27,8 @@ int drop_caches_sysctl_handler(struct ctl_table *table, int write,
static int stfu;

if (sysctl_drop_caches & 1) {
- iterate_supers(drop_pagecache_sb, NULL);
+ iterate_supers(super_drop_pagecache,
+ invalidate_inode_pages);
count_vm_event(DROP_PAGECACHE);
}
if (sysctl_drop_caches & 2) {
diff --git a/fs/super.c b/fs/super.c
index 734ed584a946..7cdbf146bc31 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,6 +36,7 @@
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
#include <linux/fs_context.h>
+#include <linux/pagemap.h>
#include <uapi/linux/mount.h>
#include "internal.h"

@@ -677,6 +678,48 @@ void drop_super_exclusive(struct super_block *sb)
}
EXPORT_SYMBOL(drop_super_exclusive);

+/*
+ * super_drop_pagecache - drop all page caches of a filesystem
+ * @sb: superblock to invalidate
+ * @arg: invalidate method, such as invalidate_inode_pages(),
+ * invalidate_inode_pages2()
+ *
+ * Scans the inodes of a filesystem, drop all page caches.
+ */
+void super_drop_pagecache(struct super_block *sb, void *arg)
+{
+ struct inode *inode, *toput_inode = NULL;
+ int (*invalidator)(struct address_space *) = arg;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ spin_lock(&inode->i_lock);
+ /*
+ * We must skip inodes in unusual state. We may also skip
+ * inodes without pages but we deliberately won't in case
+ * we need to reschedule to avoid softlockups.
+ */
+ if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+ (mapping_empty(inode->i_mapping) && !need_resched())) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&sb->s_inode_list_lock);
+
+ invalidator(inode->i_mapping);
+ iput(toput_inode);
+ toput_inode = inode;
+
+ cond_resched();
+ spin_lock(&sb->s_inode_list_lock);
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+ iput(toput_inode);
+}
+EXPORT_SYMBOL(super_drop_pagecache);
+
static void __iterate_supers(void (*f)(struct super_block *))
{
struct super_block *sb, *p = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9eced4cc286e..0e60c494688e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3292,6 +3292,7 @@ extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
+void super_drop_pagecache(struct super_block *sb, void *unused);
extern void iterate_supers(void (*)(struct super_block *, void *), void *);
extern void iterate_supers_type(struct file_system_type *,
void (*)(struct super_block *, void *), void *);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0178b2040ea3..8879c141b117 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -27,6 +27,7 @@ static inline void invalidate_remote_inode(struct inode *inode)
S_ISLNK(inode->i_mode))
invalidate_mapping_pages(inode->i_mapping, 0, -1);
}
+int invalidate_inode_pages(struct address_space *mapping);
int invalidate_inode_pages2(struct address_space *mapping);
int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
diff --git a/mm/truncate.c b/mm/truncate.c
index 0b0708bf935f..3016258d41e7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -548,12 +548,13 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
}

/**
- * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
+ * invalidate_mapping_pages - Invalidate range of clean, unlocked cache of one
+ * inode
* @mapping: the address_space which holds the cache to invalidate
* @start: the offset 'from' which to invalidate
* @end: the offset 'to' which to invalidate (inclusive)
*
- * This function removes pages that are clean, unmapped and unlocked,
+ * This function removes range of pages that are clean, unmapped and unlocked,
* as well as shadow entries. It will not block on IO activity.
*
* If you want to remove all the pages of one inode, regardless of
@@ -568,6 +569,21 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
}
EXPORT_SYMBOL(invalidate_mapping_pages);

+/**
+ * invalidate_inode_pages - Invalidate all clean, unlocked cache of one inode
+ * @mapping: the address_space which holds the cache to invalidate
+ *
+ * This function removes all pages that are clean, unmapped and unlocked,
+ * as well as shadow entries. It will not block on IO activity.
+ */
+int invalidate_inode_pages(struct address_space *mapping)
+{
+ invalidate_mapping_pages(mapping, 0, -1);
+
+ return 0;
+}
+EXPORT_SYMBOL(invalidate_inode_pages);
+
/*
* This is like invalidate_inode_page(), except it ignores the page's
* refcount. We do this because invalidate_inode_pages2() needs stronger
--
2.37.3

2022-09-25 14:34:06

by Shiyang Ruan

[permalink] [raw]
Subject: [PATCH 1/3] xfs: fix the calculation of length and end

The end should be start + length - 1. Also fix the calculation of the
length when seeking for intersection of notify range and device.

Signed-off-by: Shiyang Ruan <[email protected]>
Reviewed-by: Darrick J. Wong <[email protected]>
---
fs/xfs/xfs_notify_failure.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index c4078d0ec108..3830f908e215 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -114,7 +114,7 @@ xfs_dax_notify_ddev_failure(
int error = 0;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
- xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen);
+ xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);

error = xfs_trans_alloc_empty(mp, &tp);
@@ -210,7 +210,7 @@ xfs_dax_notify_failure(
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;

/* Ignore the range out of filesystem area */
- if (offset + len < ddev_start)
+ if (offset + len - 1 < ddev_start)
return -ENXIO;
if (offset > ddev_end)
return -ENXIO;
@@ -222,8 +222,8 @@ xfs_dax_notify_failure(
len -= ddev_start - offset;
offset = 0;
}
- if (offset + len > ddev_end)
- len -= ddev_end - offset;
+ if (offset + len - 1 > ddev_end)
+ len -= offset + len - 1 - ddev_end;

return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
mf_flags);
--
2.37.3

2022-09-30 04:25:53

by Shiyang Ruan

[permalink] [raw]
Subject: Re: [PATCH v9 0/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

Hi,

Ping

在 2022/9/25 21:33, Shiyang Ruan 写道:
> Changes since v8:
> 1. P2: rename drop_pagecache_sb() to super_drop_pagecache().
> 2. P2: let super_drop_pagecache() accept invalidate method.
> 3. P3: invalidate all dax mappings by invalidate_inode_pages2().
> 4. P3: shutdown the filesystem when it is to be removed.
> 5. Rebase on 6.0-rc6 + Darrick's patch[1] + Dan's patch[2].
>
> [1]: https://lore.kernel.org/linux-xfs/Yv5wIa2crHioYeRr@magnolia/
> [2]: https://lore.kernel.org/linux-xfs/166153426798.2758201.15108211981034512993.stgit@dwillia2-xfh.jf.intel.com/
>
> Shiyang Ruan (3):
> xfs: fix the calculation of length and end
> fs: move drop_pagecache_sb() for others to use
> mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind
>
> drivers/dax/super.c | 3 ++-
> fs/drop_caches.c | 35 ++----------------------------
> fs/super.c | 43 +++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_notify_failure.c | 36 ++++++++++++++++++++++++++-----
> include/linux/fs.h | 1 +
> include/linux/mm.h | 1 +
> include/linux/pagemap.h | 1 +
> mm/truncate.c | 20 +++++++++++++++--
> 8 files changed, 99 insertions(+), 41 deletions(-)
>

2022-10-13 10:41:37

by Shiyang Ruan

[permalink] [raw]
Subject: Re: [PATCH v9 0/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

Ping again~

在 2022/9/30 11:28, Shiyang Ruan 写道:
> Hi,
>
> Ping
>
> 在 2022/9/25 21:33, Shiyang Ruan 写道:
>> Changes since v8:
>>    1. P2: rename drop_pagecache_sb() to super_drop_pagecache().
>>    2. P2: let super_drop_pagecache() accept invalidate method.
>>    3. P3: invalidate all dax mappings by invalidate_inode_pages2().
>>    4. P3: shutdown the filesystem when it is to be removed.
>>    5. Rebase on 6.0-rc6 + Darrick's patch[1] + Dan's patch[2].
>>
>> [1]: https://lore.kernel.org/linux-xfs/Yv5wIa2crHioYeRr@magnolia/
>> [2]:
>> https://lore.kernel.org/linux-xfs/166153426798.2758201.15108211981034512993.stgit@dwillia2-xfh.jf.intel.com/
>>
>> Shiyang Ruan (3):
>>    xfs: fix the calculation of length and end
>>    fs: move drop_pagecache_sb() for others to use
>>    mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind
>>
>>   drivers/dax/super.c         |  3 ++-
>>   fs/drop_caches.c            | 35 ++----------------------------
>>   fs/super.c                  | 43 +++++++++++++++++++++++++++++++++++++
>>   fs/xfs/xfs_notify_failure.c | 36 ++++++++++++++++++++++++++-----
>>   include/linux/fs.h          |  1 +
>>   include/linux/mm.h          |  1 +
>>   include/linux/pagemap.h     |  1 +
>>   mm/truncate.c               | 20 +++++++++++++++--
>>   8 files changed, 99 insertions(+), 41 deletions(-)
>>