2021-12-18 18:53:27

by Nikita Yushchenko

[permalink] [raw]
Subject: [PATCH/RFC v2 0/3] tlb: mmu_gather: use batched table free if possible

In mmu_gather code, the final table free in __tlb_remove_table_free()
executes a loop, calling arch hook __tlb_remove_table() to free each table
individually.

Several architectures use free_page_and_swap_cache() as their
__tlb_remove_table() implementation. Calling that in loop results into
individual calls to put_page() for each page being freed.

This patchset refactors the code to issue a single release_pages() call
in this case. This is expected to have better performance, especially when
memcg accounting is enabled.

Nikita Yushchenko (3):
tlb: mmu_gather: introduce CONFIG_MMU_GATHER_TABLE_FREE_COMMON
mm/swap: introduce free_pages_and_swap_cache_nolru()
tlb: mmu_gather: use batched table free if possible

arch/Kconfig | 3 +++
arch/arm/Kconfig | 1 +
arch/arm/include/asm/tlb.h | 5 -----
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/tlb.h | 5 -----
arch/x86/Kconfig | 1 +
arch/x86/include/asm/tlb.h | 14 --------------
include/asm-generic/tlb.h | 5 +++++
include/linux/swap.h | 5 ++++-
mm/mmu_gather.c | 25 ++++++++++++++++++++++---
mm/swap_state.c | 29 ++++++++++++++++++++++-------
11 files changed, 59 insertions(+), 35 deletions(-)

--
2.30.2



2021-12-18 18:53:30

by Nikita Yushchenko

[permalink] [raw]
Subject: [PATCH/RFC v2 2/3] mm/swap: introduce free_pages_and_swap_cache_nolru()

This is a variant of free_pages_and_swap_cache() that does not call
lru_add_drain(), for better performance in case when the passed pages
are guaranteed to not be in LRU.

Signed-off-by: Nikita Yushchenko <[email protected]>
---
include/linux/swap.h | 5 ++++-
mm/swap_state.c | 29 ++++++++++++++++++++++-------
2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index d1ea44b31f19..86a1b0a61889 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -460,6 +460,7 @@ extern void clear_shadow_from_swap_cache(int type, unsigned long begin,
extern void free_swap_cache(struct page *);
extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
+extern void free_pages_and_swap_cache_nolru(struct page **, int);
extern struct page *lookup_swap_cache(swp_entry_t entry,
struct vm_area_struct *vma,
unsigned long addr);
@@ -565,7 +566,9 @@ static inline struct address_space *swap_address_space(swp_entry_t entry)
#define free_page_and_swap_cache(page) \
put_page(page)
#define free_pages_and_swap_cache(pages, nr) \
- release_pages((pages), (nr));
+ release_pages((pages), (nr))
+#define free_pages_and_swap_cache_nolru(pages, nr) \
+ release_pages((pages), (nr))

static inline void free_swap_cache(struct page *page)
{
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8d4104242100..a5d9fd258f0a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -307,17 +307,32 @@ void free_page_and_swap_cache(struct page *page)

/*
* Passed an array of pages, drop them all from swapcache and then release
- * them. They are removed from the LRU and freed if this is their last use.
+ * them. They are optionally removed from the LRU and freed if this is their
+ * last use.
*/
-void free_pages_and_swap_cache(struct page **pages, int nr)
+static void __free_pages_and_swap_cache(struct page **pages, int nr,
+ bool do_lru)
{
- struct page **pagep = pages;
int i;

- lru_add_drain();
- for (i = 0; i < nr; i++)
- free_swap_cache(pagep[i]);
- release_pages(pagep, nr);
+ if (do_lru)
+ lru_add_drain();
+ for (i = 0; i < nr; i++) {
+ if (!do_lru)
+ VM_WARN_ON_ONCE_PAGE(PageLRU(pages[i]), pages[i]);
+ free_swap_cache(pages[i]);
+ }
+ release_pages(pages, nr);
+}
+
+void free_pages_and_swap_cache(struct page **pages, int nr)
+{
+ __free_pages_and_swap_cache(pages, nr, true);
+}
+
+void free_pages_and_swap_cache_nolru(struct page **pages, int nr)
+{
+ __free_pages_and_swap_cache(pages, nr, false);
}

static inline bool swap_use_vma_readahead(void)
--
2.30.2


2021-12-18 18:53:32

by Nikita Yushchenko

[permalink] [raw]
Subject: [PATCH/RFC v2 3/3] tlb: mmu_gather: use batched table free if possible

In case when __tlb_remove_table() is implemented via
free_page_and_swap_cache(), use free_pages_and_swap_cache_nolru() for
batch table removal.

This enables use of single release_pages() call instead of a loop
calling put_page(). This shall have better performance, especially when
memcg accounting is enabled.

Signed-off-by: Nikita Yushchenko <[email protected]>
---
mm/mmu_gather.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index eb2f30a92462..2e75d396bbad 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -98,15 +98,24 @@ static inline void __tlb_remove_table(void *table)
{
free_page_and_swap_cache((struct page *)table);
}
-#endif

-static void __tlb_remove_table_free(struct mmu_table_batch *batch)
+static inline void __tlb_remove_tables(void **tables, int nr)
+{
+ free_pages_and_swap_cache_nolru((struct page **)tables, nr);
+}
+#else
+static inline void __tlb_remove_tables(void **tables, int nr)
{
int i;

- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
+ for (i = 0; i < nr; i++)
+ __tlb_remove_table(tables[i]);
+}
+#endif

+static void __tlb_remove_table_free(struct mmu_table_batch *batch)
+{
+ __tlb_remove_tables(batch->tables, batch->nr);
free_page((unsigned long)batch);
}

--
2.30.2