2020-08-24 12:57:20

by Alex Shi

[permalink] [raw]
Subject: [PATCH v18 21/32] mm/lru: introduce the relock_page_lruvec function

From: Alexander Duyck <[email protected]>

Use this new function to replace repeated same code, no func change.

When testing for relock we can avoid the need for RCU locking if we simply
compare the page pgdat and memcg pointers versus those that the lruvec is
holding. By doing this we can avoid the extra pointer walks and accesses of
the memory cgroup.

In addition we can avoid the checks entirely if lruvec is currently NULL.

Signed-off-by: Alexander Duyck <[email protected]>
Signed-off-by: Alex Shi <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Andrey Ryabinin <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Konstantin Khlebnikov <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Tejun Heo <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
---
include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++
mm/mlock.c | 9 +-------
mm/swap.c | 33 +++++++----------------------
mm/vmscan.c | 8 +------
4 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7b170e9028b5..ee6ef2d8ad52 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -488,6 +488,22 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,

struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);

+static inline bool lruvec_holds_page_lru_lock(struct page *page,
+ struct lruvec *lruvec)
+{
+ pg_data_t *pgdat = page_pgdat(page);
+ const struct mem_cgroup *memcg;
+ struct mem_cgroup_per_node *mz;
+
+ if (mem_cgroup_disabled())
+ return lruvec == &pgdat->__lruvec;
+
+ mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
+ memcg = page->mem_cgroup ? : root_mem_cgroup;
+
+ return lruvec->pgdat == pgdat && mz->memcg == memcg;
+}
+
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);

struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -1023,6 +1039,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
return &pgdat->__lruvec;
}

+static inline bool lruvec_holds_page_lru_lock(struct page *page,
+ struct lruvec *lruvec)
+{
+ pg_data_t *pgdat = page_pgdat(page);
+
+ return lruvec == &pgdat->__lruvec;
+}
+
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
return NULL;
@@ -1469,6 +1493,34 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
}

+/* Don't lock again iff page's lruvec locked */
+static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
+ struct lruvec *locked_lruvec)
+{
+ if (locked_lruvec) {
+ if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ return locked_lruvec;
+
+ unlock_page_lruvec_irq(locked_lruvec);
+ }
+
+ return lock_page_lruvec_irq(page);
+}
+
+/* Don't lock again iff page's lruvec locked */
+static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
+ struct lruvec *locked_lruvec, unsigned long *flags)
+{
+ if (locked_lruvec) {
+ if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ return locked_lruvec;
+
+ unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
+ }
+
+ return lock_page_lruvec_irqsave(page, flags);
+}
+
#ifdef CONFIG_CGROUP_WRITEBACK

struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
diff --git a/mm/mlock.c b/mm/mlock.c
index 177d2588e863..0448409184e3 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -302,17 +302,10 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
/* Phase 1: page isolation */
for (i = 0; i < nr; i++) {
struct page *page = pvec->pages[i];
- struct lruvec *new_lruvec;

/* block memcg change in mem_cgroup_move_account */
lock_page_memcg(page);
- new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
- if (new_lruvec != lruvec) {
- if (lruvec)
- unlock_page_lruvec_irq(lruvec);
- lruvec = lock_page_lruvec_irq(page);
- }
-
+ lruvec = relock_page_lruvec_irq(page, lruvec);
if (TestClearPageMlocked(page)) {
/*
* We already have pin from follow_page_mask()
diff --git a/mm/swap.c b/mm/swap.c
index b67959b701c0..2ac78e8fab71 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,

for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- struct lruvec *new_lruvec;

/* block memcg migration during page moving between lru */
if (!TestClearPageLRU(page))
continue;

- new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
- if (lruvec != new_lruvec) {
- if (lruvec)
- unlock_page_lruvec_irqrestore(lruvec, flags);
- lruvec = lock_page_lruvec_irqsave(page, &flags);
- }
-
+ lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
(*move_fn)(page, lruvec);

SetPageLRU(page);
@@ -865,17 +858,12 @@ void release_pages(struct page **pages, int nr)
}

if (PageLRU(page)) {
- struct lruvec *new_lruvec;
-
- new_lruvec = mem_cgroup_page_lruvec(page,
- page_pgdat(page));
- if (new_lruvec != lruvec) {
- if (lruvec)
- unlock_page_lruvec_irqrestore(lruvec,
- flags);
+ struct lruvec *prev_lruvec = lruvec;
+
+ lruvec = relock_page_lruvec_irqsave(page, lruvec,
+ &flags);
+ if (prev_lruvec != lruvec)
lock_batch = 0;
- lruvec = lock_page_lruvec_irqsave(page, &flags);
- }

VM_BUG_ON_PAGE(!PageLRU(page), page);
__ClearPageLRU(page);
@@ -982,15 +970,8 @@ void __pagevec_lru_add(struct pagevec *pvec)

for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
- struct lruvec *new_lruvec;
-
- new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
- if (lruvec != new_lruvec) {
- if (lruvec)
- unlock_page_lruvec_irqrestore(lruvec, flags);
- lruvec = lock_page_lruvec_irqsave(page, &flags);
- }

+ lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
__pagevec_lru_add_fn(page, lruvec);
}
if (lruvec)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 789444ae4c88..2c94790d4cb1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4280,15 +4280,9 @@ void check_move_unevictable_pages(struct pagevec *pvec)

for (i = 0; i < pvec->nr; i++) {
struct page *page = pvec->pages[i];
- struct lruvec *new_lruvec;

pgscanned++;
- new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
- if (lruvec != new_lruvec) {
- if (lruvec)
- unlock_page_lruvec_irq(lruvec);
- lruvec = lock_page_lruvec_irq(page);
- }
+ lruvec = relock_page_lruvec_irq(page, lruvec);

if (!PageLRU(page) || !PageUnevictable(page))
continue;
--
1.8.3.1


2020-09-22 08:28:26

by Hugh Dickins

[permalink] [raw]
Subject: Re: [PATCH v18 21/32] mm/lru: introduce the relock_page_lruvec function

On Mon, 24 Aug 2020, Alex Shi wrote:

> From: Alexander Duyck <[email protected]>
>
> Use this new function to replace repeated same code, no func change.
>
> When testing for relock we can avoid the need for RCU locking if we simply
> compare the page pgdat and memcg pointers versus those that the lruvec is
> holding. By doing this we can avoid the extra pointer walks and accesses of
> the memory cgroup.
>
> In addition we can avoid the checks entirely if lruvec is currently NULL.
>
> Signed-off-by: Alexander Duyck <[email protected]>
> Signed-off-by: Alex Shi <[email protected]>

Again, I'll wait to see __munlock_pagevec() fixed before Acking this
one, but that's the only issue. And I've suggested that you use
lruvec_holds_page_lru_lock() in mm/vmscan.c move_pages_to_lru(),
to replace the uglier and less efficient VM_BUG_ON_PAGE there.

Oh, there is one other issue: 0day robot did report (2020-06-19)
that sparse doesn't understand relock_page_lruvec*(): I've never
got around to working out how to write what it needs, conditional
__release plus __acquire in some form, I imagine. I've never got
into sparse annotations before, I'll give it a try, but if anyone
beats me that will be welcome: and there are higher priorities -
I do not think you should wait for the sparse warning to be fixed
before reposting.

> Cc: Johannes Weiner <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Andrey Ryabinin <[email protected]>
> Cc: Matthew Wilcox <[email protected]>
> Cc: Mel Gorman <[email protected]>
> Cc: Konstantin Khlebnikov <[email protected]>
> Cc: Hugh Dickins <[email protected]>
> Cc: Tejun Heo <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> ---
> include/linux/memcontrol.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++
> mm/mlock.c | 9 +-------
> mm/swap.c | 33 +++++++----------------------
> mm/vmscan.c | 8 +------
> 4 files changed, 61 insertions(+), 41 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7b170e9028b5..ee6ef2d8ad52 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -488,6 +488,22 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
>
> struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
>
> +static inline bool lruvec_holds_page_lru_lock(struct page *page,
> + struct lruvec *lruvec)
> +{
> + pg_data_t *pgdat = page_pgdat(page);
> + const struct mem_cgroup *memcg;
> + struct mem_cgroup_per_node *mz;
> +
> + if (mem_cgroup_disabled())
> + return lruvec == &pgdat->__lruvec;
> +
> + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> + memcg = page->mem_cgroup ? : root_mem_cgroup;
> +
> + return lruvec->pgdat == pgdat && mz->memcg == memcg;
> +}
> +
> struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
>
> struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
> @@ -1023,6 +1039,14 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
> return &pgdat->__lruvec;
> }
>
> +static inline bool lruvec_holds_page_lru_lock(struct page *page,
> + struct lruvec *lruvec)
> +{
> + pg_data_t *pgdat = page_pgdat(page);
> +
> + return lruvec == &pgdat->__lruvec;
> +}
> +
> static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
> {
> return NULL;
> @@ -1469,6 +1493,34 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
> spin_unlock_irqrestore(&lruvec->lru_lock, flags);
> }
>
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
> + struct lruvec *locked_lruvec)
> +{
> + if (locked_lruvec) {
> + if (lruvec_holds_page_lru_lock(page, locked_lruvec))
> + return locked_lruvec;
> +
> + unlock_page_lruvec_irq(locked_lruvec);
> + }
> +
> + return lock_page_lruvec_irq(page);
> +}
> +
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
> + struct lruvec *locked_lruvec, unsigned long *flags)
> +{
> + if (locked_lruvec) {
> + if (lruvec_holds_page_lru_lock(page, locked_lruvec))
> + return locked_lruvec;
> +
> + unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
> + }
> +
> + return lock_page_lruvec_irqsave(page, flags);
> +}
> +
> #ifdef CONFIG_CGROUP_WRITEBACK
>
> struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 177d2588e863..0448409184e3 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -302,17 +302,10 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
> /* Phase 1: page isolation */
> for (i = 0; i < nr; i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> /* block memcg change in mem_cgroup_move_account */
> lock_page_memcg(page);
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (new_lruvec != lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irq(lruvec);
> - lruvec = lock_page_lruvec_irq(page);
> - }
> -
> + lruvec = relock_page_lruvec_irq(page, lruvec);
> if (TestClearPageMlocked(page)) {
> /*
> * We already have pin from follow_page_mask()
> diff --git a/mm/swap.c b/mm/swap.c
> index b67959b701c0..2ac78e8fab71 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
>
> for (i = 0; i < pagevec_count(pvec); i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> /* block memcg migration during page moving between lru */
> if (!TestClearPageLRU(page))
> continue;
>
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec, flags);
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
> -
> + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
> (*move_fn)(page, lruvec);
>
> SetPageLRU(page);
> @@ -865,17 +858,12 @@ void release_pages(struct page **pages, int nr)
> }
>
> if (PageLRU(page)) {
> - struct lruvec *new_lruvec;
> -
> - new_lruvec = mem_cgroup_page_lruvec(page,
> - page_pgdat(page));
> - if (new_lruvec != lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec,
> - flags);
> + struct lruvec *prev_lruvec = lruvec;
> +
> + lruvec = relock_page_lruvec_irqsave(page, lruvec,
> + &flags);
> + if (prev_lruvec != lruvec)
> lock_batch = 0;
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
>
> VM_BUG_ON_PAGE(!PageLRU(page), page);
> __ClearPageLRU(page);
> @@ -982,15 +970,8 @@ void __pagevec_lru_add(struct pagevec *pvec)
>
> for (i = 0; i < pagevec_count(pvec); i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
> -
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irqrestore(lruvec, flags);
> - lruvec = lock_page_lruvec_irqsave(page, &flags);
> - }
>
> + lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
> __pagevec_lru_add_fn(page, lruvec);
> }
> if (lruvec)
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 789444ae4c88..2c94790d4cb1 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4280,15 +4280,9 @@ void check_move_unevictable_pages(struct pagevec *pvec)
>
> for (i = 0; i < pvec->nr; i++) {
> struct page *page = pvec->pages[i];
> - struct lruvec *new_lruvec;
>
> pgscanned++;
> - new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> - if (lruvec != new_lruvec) {
> - if (lruvec)
> - unlock_page_lruvec_irq(lruvec);
> - lruvec = lock_page_lruvec_irq(page);
> - }
> + lruvec = relock_page_lruvec_irq(page, lruvec);
>
> if (!PageLRU(page) || !PageUnevictable(page))
> continue;
> --
> 1.8.3.1