From: Kairui Song <[email protected]>
This series cleanup some code path, saves a few cycles and reduce the
object size by a bit, also fixes some rare race issue of statistics.
Kairui Song (5):
swapfile: get rid of volatile and avoid redundant read
swap: avoid a redundant pte map if ra window is 1
swap: fold swap_ra_clamp_pfn into swap_ra_info
swap: remove the swap lock in swap_cache_get_folio
swap: avoid ra statistic lost when swapin races
mm/shmem.c | 8 +++++-
mm/swap_state.c | 66 +++++++++++++++++++------------------------------
mm/swapfile.c | 7 +++---
3 files changed, 36 insertions(+), 45 deletions(-)
--
2.35.2
From: Kairui Song <[email protected]>
Convert a volatile variable to more readable READ_ONCE. And this
actually avoids the code from reading the variable twice redundantly
when it races.
Signed-off-by: Kairui Song <[email protected]>
---
mm/swapfile.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 72e481aacd5d..ff4f3cb85232 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1836,13 +1836,13 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
pte_t *pte;
struct swap_info_struct *si;
int ret = 0;
- volatile unsigned char *swap_map;
si = swap_info[type];
pte = pte_offset_map(pmd, addr);
do {
struct folio *folio;
unsigned long offset;
+ unsigned char swp_count;
if (!is_swap_pte(*pte))
continue;
@@ -1853,7 +1853,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
offset = swp_offset(entry);
pte_unmap(pte);
- swap_map = &si->swap_map[offset];
folio = swap_cache_get_folio(entry, vma, addr);
if (!folio) {
struct page *page;
@@ -1870,8 +1869,10 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
folio = page_folio(page);
}
if (!folio) {
- if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
+ swp_count = READ_ONCE(si->swap_map[offset]);
+ if (swp_count == 0 || swp_count == SWAP_MAP_BAD)
goto try_next;
+
return -ENOMEM;
}
--
2.35.2
From: Kairui Song <[email protected]>
This make the code cleaner. This helper is made of only two line of
self explanational code and not reused anywhere else.
And this actually make the compiled object smaller by a bit:
text data bss dec hex filename
Before: 9502 976 12 10490 28fa mm/swap_state.o
After: 9470 976 12 10458 28da mm/swap_state.o
Signed-off-by: Kairui Song <[email protected]>
---
mm/swap_state.c | 44 +++++++++++++++++++-------------------------
1 file changed, 19 insertions(+), 25 deletions(-)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 60136bda78e3..19089417abd1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -696,28 +696,15 @@ void exit_swap_address_space(unsigned int type)
swapper_spaces[type] = NULL;
}
-static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
- unsigned long faddr,
- unsigned long lpfn,
- unsigned long rpfn,
- unsigned long *start,
- unsigned long *end)
-{
- *start = max3(lpfn, PFN_DOWN(vma->vm_start),
- PFN_DOWN(faddr & PMD_MASK));
- *end = min3(rpfn, PFN_DOWN(vma->vm_end),
- PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
-}
-
static void swap_ra_info(struct vm_fault *vmf,
- struct vma_swap_readahead *ra_info)
+ struct vma_swap_readahead *ra_info)
{
struct vm_area_struct *vma = vmf->vma;
unsigned long ra_val;
- unsigned long faddr, pfn, fpfn;
+ unsigned long faddr, pfn, fpfn, lpfn, rpfn;
unsigned long start, end;
pte_t *pte, *orig_pte;
- unsigned int max_win, hits, prev_win, win, left;
+ unsigned int max_win, hits, prev_win, win;
#ifndef CONFIG_64BIT
pte_t *tpte;
#endif
@@ -745,16 +732,23 @@ static void swap_ra_info(struct vm_fault *vmf,
/* Copy the PTEs because the page table may be unmapped */
orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
- if (fpfn == pfn + 1)
- swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
- else if (pfn == fpfn + 1)
- swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
- &start, &end);
- else {
- left = (win - 1) / 2;
- swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
- &start, &end);
+ if (fpfn == pfn + 1) {
+ lpfn = fpfn;
+ rpfn = fpfn + win;
+ } else if (pfn == fpfn + 1) {
+ lpfn = fpfn - win + 1;
+ rpfn = fpfn + 1;
+ } else {
+ unsigned int left = (win - 1) / 2;
+
+ lpfn = fpfn - left;
+ rpfn = fpfn + win - left;
}
+ start = max3(lpfn, PFN_DOWN(vma->vm_start),
+ PFN_DOWN(faddr & PMD_MASK));
+ end = min3(rpfn, PFN_DOWN(vma->vm_end),
+ PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
+
ra_info->nr_pte = end - start;
ra_info->offset = fpfn - start;
pte -= ra_info->offset;
--
2.35.2
On Fri, Dec 09, 2022 at 02:02:07AM +0800, Kairui Song wrote:
> From: Kairui Song <[email protected]>
>
> This make the code cleaner. This helper is made of only two line of
> self explanational code and not reused anywhere else.
>
> And this actually make the compiled object smaller by a bit:
>
> text data bss dec hex filename
> Before: 9502 976 12 10490 28fa mm/swap_state.o
> After: 9470 976 12 10458 28da mm/swap_state.o
FYI, you can use scripts/bloat-o-meter to get a slightly more
useful analysis of object code changes.
Matthew Wilcox <[email protected]> 于2022年12月9日周五 03:09写道:
>
> On Fri, Dec 09, 2022 at 02:02:07AM +0800, Kairui Song wrote:
> > From: Kairui Song <[email protected]>
> >
> > This make the code cleaner. This helper is made of only two line of
> > self explanational code and not reused anywhere else.
> >
> > And this actually make the compiled object smaller by a bit:
> >
> > text data bss dec hex filename
> > Before: 9502 976 12 10490 28fa mm/swap_state.o
> > After: 9470 976 12 10458 28da mm/swap_state.o
>
> FYI, you can use scripts/bloat-o-meter to get a slightly more
> useful analysis of object code changes.
>
Thanks! That's very helpful info, I got following output from bloat-o-meter:
./scripts/bloat-o-meter mm/swap_state.o.old mm/swap_state.o
add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-35 (-35)
Function old new delta
swap_ra_info.constprop 512 477 -35
Total: Before=8388, After=8353, chg -0.42%
I'll attach this info in commit message from now on.
Kairui Song <[email protected]> writes:
> From: Kairui Song <[email protected]>
>
> Convert a volatile variable to more readable READ_ONCE. And this
> actually avoids the code from reading the variable twice redundantly
> when it races.
>
> Signed-off-by: Kairui Song <[email protected]>
LGTM, Thanks!
Reviewed-by: "Huang, Ying" <[email protected]>
> ---
> mm/swapfile.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 72e481aacd5d..ff4f3cb85232 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1836,13 +1836,13 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
> pte_t *pte;
> struct swap_info_struct *si;
> int ret = 0;
> - volatile unsigned char *swap_map;
>
> si = swap_info[type];
> pte = pte_offset_map(pmd, addr);
> do {
> struct folio *folio;
> unsigned long offset;
> + unsigned char swp_count;
>
> if (!is_swap_pte(*pte))
> continue;
> @@ -1853,7 +1853,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>
> offset = swp_offset(entry);
> pte_unmap(pte);
> - swap_map = &si->swap_map[offset];
> folio = swap_cache_get_folio(entry, vma, addr);
> if (!folio) {
> struct page *page;
> @@ -1870,8 +1869,10 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
> folio = page_folio(page);
> }
> if (!folio) {
> - if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
> + swp_count = READ_ONCE(si->swap_map[offset]);
> + if (swp_count == 0 || swp_count == SWAP_MAP_BAD)
> goto try_next;
> +
> return -ENOMEM;
> }
Kairui Song <[email protected]> writes:
> From: Kairui Song <[email protected]>
>
> This make the code cleaner. This helper is made of only two line of
> self explanational code and not reused anywhere else.
>
> And this actually make the compiled object smaller by a bit:
>
> text data bss dec hex filename
> Before: 9502 976 12 10490 28fa mm/swap_state.o
> After: 9470 976 12 10458 28da mm/swap_state.o
>
> Signed-off-by: Kairui Song <[email protected]>
> ---
> mm/swap_state.c | 44 +++++++++++++++++++-------------------------
> 1 file changed, 19 insertions(+), 25 deletions(-)
LGTM, Thanks!
Reviewed-by: "Huang, Ying" <[email protected]>
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 60136bda78e3..19089417abd1 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -696,28 +696,15 @@ void exit_swap_address_space(unsigned int type)
> swapper_spaces[type] = NULL;
> }
>
> -static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
> - unsigned long faddr,
> - unsigned long lpfn,
> - unsigned long rpfn,
> - unsigned long *start,
> - unsigned long *end)
> -{
> - *start = max3(lpfn, PFN_DOWN(vma->vm_start),
> - PFN_DOWN(faddr & PMD_MASK));
> - *end = min3(rpfn, PFN_DOWN(vma->vm_end),
> - PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> -}
> -
> static void swap_ra_info(struct vm_fault *vmf,
> - struct vma_swap_readahead *ra_info)
> + struct vma_swap_readahead *ra_info)
> {
> struct vm_area_struct *vma = vmf->vma;
> unsigned long ra_val;
> - unsigned long faddr, pfn, fpfn;
> + unsigned long faddr, pfn, fpfn, lpfn, rpfn;
> unsigned long start, end;
> pte_t *pte, *orig_pte;
> - unsigned int max_win, hits, prev_win, win, left;
> + unsigned int max_win, hits, prev_win, win;
> #ifndef CONFIG_64BIT
> pte_t *tpte;
> #endif
> @@ -745,16 +732,23 @@ static void swap_ra_info(struct vm_fault *vmf,
>
> /* Copy the PTEs because the page table may be unmapped */
> orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> - if (fpfn == pfn + 1)
> - swap_ra_clamp_pfn(vma, faddr, fpfn, fpfn + win, &start, &end);
> - else if (pfn == fpfn + 1)
> - swap_ra_clamp_pfn(vma, faddr, fpfn - win + 1, fpfn + 1,
> - &start, &end);
> - else {
> - left = (win - 1) / 2;
> - swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
> - &start, &end);
> + if (fpfn == pfn + 1) {
> + lpfn = fpfn;
> + rpfn = fpfn + win;
> + } else if (pfn == fpfn + 1) {
> + lpfn = fpfn - win + 1;
> + rpfn = fpfn + 1;
> + } else {
> + unsigned int left = (win - 1) / 2;
> +
> + lpfn = fpfn - left;
> + rpfn = fpfn + win - left;
> }
> + start = max3(lpfn, PFN_DOWN(vma->vm_start),
> + PFN_DOWN(faddr & PMD_MASK));
> + end = min3(rpfn, PFN_DOWN(vma->vm_end),
> + PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> +
> ra_info->nr_pte = end - start;
> ra_info->offset = fpfn - start;
> pte -= ra_info->offset;