From: Huang Ying <[email protected]>
total_swapcache_pages() may race with swapper_spaces[] allocation and
freeing. Previously, this is protected with a swapper_spaces[]
specific RCU mechanism. To simplify the logic/code complexity, it is
replaced with get/put_swap_device(). The code line number is reduced
too. Although not so important, the swapoff() performance improves
too because one synchronize_rcu() call during swapoff() is deleted.
Signed-off-by: "Huang, Ying" <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Jérôme Glisse <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Yang Shi <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Dave Jiang <[email protected]>
Cc: Daniel Jordan <[email protected]>
Cc: Andrea Parri <[email protected]>
---
mm/swap_state.c | 28 ++++++++++------------------
1 file changed, 10 insertions(+), 18 deletions(-)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f509cdaa81b1..b84c58b572ca 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -73,23 +73,19 @@ unsigned long total_swapcache_pages(void)
unsigned int i, j, nr;
unsigned long ret = 0;
struct address_space *spaces;
+ struct swap_info_struct *si;
- rcu_read_lock();
for (i = 0; i < MAX_SWAPFILES; i++) {
- /*
- * The corresponding entries in nr_swapper_spaces and
- * swapper_spaces will be reused only after at least
- * one grace period. So it is impossible for them
- * belongs to different usage.
- */
- nr = nr_swapper_spaces[i];
- spaces = rcu_dereference(swapper_spaces[i]);
- if (!nr || !spaces)
+ /* Prevent swapoff to free swapper_spaces */
+ si = get_swap_device(swp_entry(i, 1));
+ if (!si)
continue;
+ nr = nr_swapper_spaces[i];
+ spaces = swapper_spaces[i];
for (j = 0; j < nr; j++)
ret += spaces[j].nrpages;
+ put_swap_device(si);
}
- rcu_read_unlock();
return ret;
}
@@ -611,20 +607,16 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
mapping_set_no_writeback_tags(space);
}
nr_swapper_spaces[type] = nr;
- rcu_assign_pointer(swapper_spaces[type], spaces);
+ swapper_spaces[type] = spaces;
return 0;
}
void exit_swap_address_space(unsigned int type)
{
- struct address_space *spaces;
-
- spaces = swapper_spaces[type];
+ kvfree(swapper_spaces[type]);
nr_swapper_spaces[type] = 0;
- rcu_assign_pointer(swapper_spaces[type], NULL);
- synchronize_rcu();
- kvfree(spaces);
+ swapper_spaces[type] = NULL;
}
static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
--
2.20.1
On Mon, May 27, 2019 at 04:27:14PM +0800, Huang, Ying wrote:
> From: Huang Ying <[email protected]>
>
> total_swapcache_pages() may race with swapper_spaces[] allocation and
> freeing. Previously, this is protected with a swapper_spaces[]
> specific RCU mechanism. To simplify the logic/code complexity, it is
> replaced with get/put_swap_device(). The code line number is reduced
> too. Although not so important, the swapoff() performance improves
> too because one synchronize_rcu() call during swapoff() is deleted.
I am guessing that total_swapcache_pages() is not used on any
fastpaths, but must defer to others on this. Of course, if the
performance/scalability of total_swapcache_pages() is important,
benchmarking is needed.
But where do I find get_swap_device() and put_swap_device()? I do not
see them in current mainline.
Thanx, Paul
> Signed-off-by: "Huang, Ying" <[email protected]>
> Cc: Hugh Dickins <[email protected]>
> Cc: Paul E. McKenney <[email protected]>
> Cc: Minchan Kim <[email protected]>
> Cc: Johannes Weiner <[email protected]>
> Cc: Tim Chen <[email protected]>
> Cc: Mel Gorman <[email protected]>
> Cc: J?r?me Glisse <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Andrea Arcangeli <[email protected]>
> Cc: Yang Shi <[email protected]>
> Cc: David Rientjes <[email protected]>
> Cc: Rik van Riel <[email protected]>
> Cc: Jan Kara <[email protected]>
> Cc: Dave Jiang <[email protected]>
> Cc: Daniel Jordan <[email protected]>
> Cc: Andrea Parri <[email protected]>
> ---
> mm/swap_state.c | 28 ++++++++++------------------
> 1 file changed, 10 insertions(+), 18 deletions(-)
>
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index f509cdaa81b1..b84c58b572ca 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -73,23 +73,19 @@ unsigned long total_swapcache_pages(void)
> unsigned int i, j, nr;
> unsigned long ret = 0;
> struct address_space *spaces;
> + struct swap_info_struct *si;
>
> - rcu_read_lock();
> for (i = 0; i < MAX_SWAPFILES; i++) {
> - /*
> - * The corresponding entries in nr_swapper_spaces and
> - * swapper_spaces will be reused only after at least
> - * one grace period. So it is impossible for them
> - * belongs to different usage.
> - */
> - nr = nr_swapper_spaces[i];
> - spaces = rcu_dereference(swapper_spaces[i]);
> - if (!nr || !spaces)
> + /* Prevent swapoff to free swapper_spaces */
> + si = get_swap_device(swp_entry(i, 1));
> + if (!si)
> continue;
> + nr = nr_swapper_spaces[i];
> + spaces = swapper_spaces[i];
> for (j = 0; j < nr; j++)
> ret += spaces[j].nrpages;
> + put_swap_device(si);
> }
> - rcu_read_unlock();
> return ret;
> }
>
> @@ -611,20 +607,16 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
> mapping_set_no_writeback_tags(space);
> }
> nr_swapper_spaces[type] = nr;
> - rcu_assign_pointer(swapper_spaces[type], spaces);
> + swapper_spaces[type] = spaces;
>
> return 0;
> }
>
> void exit_swap_address_space(unsigned int type)
> {
> - struct address_space *spaces;
> -
> - spaces = swapper_spaces[type];
> + kvfree(swapper_spaces[type]);
> nr_swapper_spaces[type] = 0;
> - rcu_assign_pointer(swapper_spaces[type], NULL);
> - synchronize_rcu();
> - kvfree(spaces);
> + swapper_spaces[type] = NULL;
> }
>
> static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
> --
> 2.20.1
>
> But where do I find get_swap_device() and put_swap_device()? I do not
> see them in current mainline.
You should see them in the -mm tree:
https://ozlabs.org/~akpm/mmots/broken-out/mm-swap-fix-race-between-swapoff-and-some-swap-operations.patch
or
http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/commit/?id=87efc56527b92a59d15c5d4e4b05f875b276a59a
Thanks,
Andrea