2022-05-30 13:37:18

by Muchun Song

[permalink] [raw]
Subject: [PATCH v5 06/11] mm: thp: make split queue lock safe when LRU pages are reparented

Similar to the lruvec lock, we use the same approach to make the split
queue lock safe when LRU pages are reparented.

Signed-off-by: Muchun Song <[email protected]>
Acked-by: Roman Gushchin <[email protected]>
---
include/linux/memcontrol.h | 10 ++++
mm/huge_memory.c | 116 +++++++++++++++++++++++++++++++++++----------
2 files changed, 100 insertions(+), 26 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e390aaa46776..56227603dcb8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1650,6 +1650,11 @@ int alloc_shrinker_info(struct mem_cgroup *memcg);
void free_shrinker_info(struct mem_cgroup *memcg);
void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
void reparent_shrinker_deferred(struct mem_cgroup *memcg);
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return shrinker->id;
+}
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1663,6 +1668,11 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg,
int nid, int shrinker_id)
{
}
+
+static inline int shrinker_id(struct shrinker *shrinker)
+{
+ return -1;
+}
#endif

#ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b17b9d25d045..d3411dc291ab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -503,25 +503,90 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
}

#ifdef CONFIG_MEMCG
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct mem_cgroup *memcg = page_memcg(compound_head(page));
- struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ if (mem_cgroup_disabled())
+ return NULL;
+ if (&NODE_DATA(folio_nid(folio))->deferred_split_queue == queue)
+ return NULL;
+ return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}

- if (memcg)
- return &memcg->deferred_split_queue;
- else
- return &pgdat->deferred_split_queue;
+static inline struct deferred_split *folio_memcg_split_queue(struct folio *folio)
+{
+ struct mem_cgroup *memcg = folio_memcg(folio);
+
+ return memcg ? &memcg->deferred_split_queue : NULL;
}
#else
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline struct mem_cgroup *folio_split_queue_memcg(struct folio *folio,
+ struct deferred_split *queue)
{
- struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ return NULL;
+}

- return &pgdat->deferred_split_queue;
+static inline struct deferred_split *folio_memcg_split_queue(struct folio *folio)
+{
+ return NULL;
}
#endif

+static struct deferred_split *folio_split_queue(struct folio *folio)
+{
+ struct deferred_split *queue = folio_memcg_split_queue(folio);
+
+ return queue ? : &NODE_DATA(folio_nid(folio))->deferred_split_queue;
+}
+
+static struct deferred_split *folio_split_queue_lock(struct folio *folio)
+{
+ struct deferred_split *queue;
+
+ rcu_read_lock();
+retry:
+ queue = folio_split_queue(folio);
+ spin_lock(&queue->split_queue_lock);
+
+ if (unlikely(folio_split_queue_memcg(folio, queue) != folio_memcg(folio))) {
+ spin_unlock(&queue->split_queue_lock);
+ goto retry;
+ }
+ rcu_read_unlock();
+
+ return queue;
+}
+
+static struct deferred_split *
+folio_split_queue_lock_irqsave(struct folio *folio, unsigned long *flags)
+{
+ struct deferred_split *queue;
+
+ rcu_read_lock();
+retry:
+ queue = folio_split_queue(folio);
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+ if (unlikely(folio_split_queue_memcg(folio, queue) != folio_memcg(folio))) {
+ spin_unlock_irqrestore(&queue->split_queue_lock, *flags);
+ goto retry;
+ }
+ rcu_read_unlock();
+
+ return queue;
+}
+
+static inline void split_queue_unlock(struct deferred_split *queue)
+{
+ spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void split_queue_unlock_irqrestore(struct deferred_split *queue,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
void prep_transhuge_page(struct page *page)
{
/*
@@ -2489,7 +2554,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct folio *folio = page_folio(page);
struct page *head = &folio->page;
- struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ struct deferred_split *ds_queue;
XA_STATE(xas, &head->mapping->i_pages, head->index);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
@@ -2581,13 +2646,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}

/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&ds_queue->split_queue_lock);
+ ds_queue = folio_split_queue_lock(folio);
if (page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
}
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
if (mapping) {
int nr = thp_nr_pages(head);

@@ -2605,7 +2670,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
__split_huge_page(page, list, end);
ret = 0;
} else {
- spin_unlock(&ds_queue->split_queue_lock);
+ split_queue_unlock(ds_queue);
fail:
if (mapping)
xas_unlock(&xas);
@@ -2630,25 +2695,23 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

void free_transhuge_page(struct page *page)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(page);
+ struct deferred_split *ds_queue;
unsigned long flags;

- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(page_folio(page), &flags);
if (!list_empty(page_deferred_list(page))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(page));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
free_compound_page(page);
}

void deferred_split_huge_page(struct page *page)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(page);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = page_memcg(compound_head(page));
-#endif
+ struct deferred_split *ds_queue;
unsigned long flags;
+ struct folio *folio = page_folio(page);

VM_BUG_ON_PAGE(!PageTransHuge(page), page);

@@ -2665,18 +2728,19 @@ void deferred_split_huge_page(struct page *page)
if (PageSwapCache(page))
return;

- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
if (list_empty(page_deferred_list(page))) {
+ struct mem_cgroup *memcg;
+
+ memcg = folio_split_queue_memcg(folio, ds_queue);
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
ds_queue->split_queue_len++;
-#ifdef CONFIG_MEMCG
if (memcg)
set_shrinker_bit(memcg, page_to_nid(page),
- deferred_split_shrinker.id);
-#endif
+ shrinker_id(&deferred_split_shrinker));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ split_queue_unlock_irqrestore(ds_queue, flags);
}

static unsigned long deferred_split_count(struct shrinker *shrink,
--
2.11.0