2022-12-22 06:17:29

by Yuanchu Xie

[permalink] [raw]
Subject: [PATCH 1/2] mm: add vma_has_locality()

From: Yu Zhao <[email protected]>

Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
places that check for locality are missing one of them. We add
vma_has_locality to replace the existing locality checks for clarity.

Signed-off-by: Yu Zhao <[email protected]>
Signed-off-by: Yuanchu Xie <[email protected]>
---
include/linux/mm_inline.h | 8 ++++++++
mm/memory.c | 7 +++----
mm/rmap.c | 42 +++++++++++++++++----------------------
mm/vmscan.c | 5 ++++-
4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index e8ed225d8f7c..80c0f6901ead 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -578,4 +578,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
#endif
}

+static inline bool vma_has_locality(struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
+ return false;
+
+ return true;
+}
+
#endif
diff --git a/mm/memory.c b/mm/memory.c
index 4000e9f017e0..a3f60e53f348 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1402,8 +1402,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
force_flush = 1;
}
}
- if (pte_young(ptent) &&
- likely(!(vma->vm_flags & VM_SEQ_READ)))
+ if (pte_young(ptent) && likely(vma_has_locality(vma)))
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
@@ -5148,8 +5147,8 @@ static inline void mm_account_fault(struct pt_regs *regs,
#ifdef CONFIG_LRU_GEN
static void lru_gen_enter_fault(struct vm_area_struct *vma)
{
- /* the LRU algorithm doesn't apply to sequential or random reads */
- current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
+ /* the LRU algorithm only applies to accesses with locality */
+ current->in_lru_fault = vma_has_locality(vma);
}

static void lru_gen_exit_fault(void)
diff --git a/mm/rmap.c b/mm/rmap.c
index 32e48b1c5847..a2e83fea6fed 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -823,25 +823,14 @@ static bool folio_referenced_one(struct folio *folio,
}

if (pvmw.pte) {
- if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
- !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
+ if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
lru_gen_look_around(&pvmw);
referenced++;
}

if (ptep_clear_flush_young_notify(vma, address,
- pvmw.pte)) {
- /*
- * Don't treat a reference through
- * a sequentially read mapping as such.
- * If the folio has been used in another mapping,
- * we will catch it; if this other mapping is
- * already gone, the unmap path will have set
- * the referenced flag or activated the folio.
- */
- if (likely(!(vma->vm_flags & VM_SEQ_READ)))
- referenced++;
- }
+ pvmw.pte))
+ referenced++;
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
if (pmdp_clear_flush_young_notify(vma, address,
pvmw.pmd))
@@ -875,7 +864,20 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
struct folio_referenced_arg *pra = arg;
struct mem_cgroup *memcg = pra->memcg;

- if (!mm_match_cgroup(vma->vm_mm, memcg))
+ /*
+ * Ignore references from this mapping if it has no locality. If the
+ * folio has been used in another mapping, we will catch it; if this
+ * other mapping is already gone, the unmap path will have set the
+ * referenced flag or activated the folio in zap_pte_range().
+ */
+ if (!vma_has_locality(vma))
+ return true;
+
+ /*
+ * If we are reclaiming on behalf of a cgroup, skip counting on behalf
+ * of references from different cgroups
+ */
+ if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
return true;

return false;
@@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked,
.arg = (void *)&pra,
.anon_lock = folio_lock_anon_vma_read,
.try_lock = true,
+ .invalid_vma = invalid_folio_referenced_vma,
};

*vm_flags = 0;
@@ -921,15 +924,6 @@ int folio_referenced(struct folio *folio, int is_locked,
return 1;
}

- /*
- * If we are reclaiming on behalf of a cgroup, skip
- * counting on behalf of references from different
- * cgroups
- */
- if (memcg) {
- rwc.invalid_vma = invalid_folio_referenced_vma;
- }
-
rmap_walk(folio, &rwc);
*vm_flags = pra.vm_flags;

diff --git a/mm/vmscan.c b/mm/vmscan.c
index e83d2a74e942..5cf39f314876 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3782,7 +3782,10 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
if (is_vm_hugetlb_page(vma))
return true;

- if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
+ if (!vma_has_locality(vma))
+ return true;
+
+ if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
return true;

if (vma == get_gate_vma(vma->vm_mm))
--
2.39.0.314.g84b9a713c41-goog


2022-12-22 06:38:42

by Yuanchu Xie

[permalink] [raw]
Subject: [PATCH 2/2] mm: support POSIX_FADV_NOREUSE for generic fadvise handler

From: Yu Zhao <[email protected]>

POSIX_FADV_NOREUSE allows an application to specify that accesses to
file data does not follow LRU and is used only once. Since 2.6.18 this
is a no-op. We add FMODE_NOREUSE, checked in vma_has_locality to prevent
LRU activation.

Signed-off-by: Yu Zhao <[email protected]>
Signed-off-by: Yuanchu Xie <[email protected]>
---
include/linux/fs.h | 2 ++
include/linux/mm_inline.h | 3 +++
mm/fadvise.c | 5 ++++-
3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 066555ad1bf8..5660ed0edf1a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File supports DIRECT IO */
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)

+#define FMODE_NOREUSE ((__force fmode_t)0x800000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000)

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 80c0f6901ead..024f834d952d 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -583,6 +583,9 @@ static inline bool vma_has_locality(struct vm_area_struct *vma)
if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
return false;

+ if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
+ return false;
+
return true;
}

diff --git a/mm/fadvise.c b/mm/fadvise.c
index bf04fec87f35..fb7c5f43fd2a 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -80,7 +80,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
case POSIX_FADV_NORMAL:
file->f_ra.ra_pages = bdi->ra_pages;
spin_lock(&file->f_lock);
- file->f_mode &= ~FMODE_RANDOM;
+ file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_RANDOM:
@@ -107,6 +107,9 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
force_page_cache_readahead(mapping, file, start_index, nrpages);
break;
case POSIX_FADV_NOREUSE:
+ spin_lock(&file->f_lock);
+ file->f_mode |= FMODE_NOREUSE;
+ spin_unlock(&file->f_lock);
break;
case POSIX_FADV_DONTNEED:
__filemap_fdatawrite_range(mapping, offset, endbyte,
--
2.39.0.314.g84b9a713c41-goog

2022-12-22 07:22:01

by Yuanchu Xie

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

I forgot to add the Ack from Johannes earlier[1]

[1] https://lore.kernel.org/all/[email protected]/


Acked-by: Johannes Weiner <[email protected]>

2022-12-22 19:02:01

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

On Wed, 21 Dec 2022 22:13:40 -0800 Yuanchu Xie <[email protected]> wrote:

> From: Yu Zhao <[email protected]>
>
> Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
> VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
> places that check for locality are missing one of them. We add
> vma_has_locality to replace the existing locality checks for clarity.

I'm all confused. Surely VM_SEQ_READ implies locality and VM_RAND_READ
indicates no-locality?

2022-12-22 20:18:18

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

On Thu, Dec 22, 2022 at 11:49 AM Andrew Morton
<[email protected]> wrote:
>
> On Wed, 21 Dec 2022 22:13:40 -0800 Yuanchu Xie <[email protected]> wrote:
>
> > From: Yu Zhao <[email protected]>

This works; suggested-by probably works even better, since I didn't do
the follow-up work.

> > Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
> > VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
> > places that check for locality are missing one of them. We add
> > vma_has_locality to replace the existing locality checks for clarity.
>
> I'm all confused. Surely VM_SEQ_READ implies locality and VM_RAND_READ
> indicates no-locality?

Spatially, yes. But we focus more on the temporal criteria here, i.e.,
the reuse of an area within a relatively small duration. Both the
active/inactive LRU and MGLRU rely on this.

VM_SEQ_READ, while being a special case of spatial locality, fails the
temporal criteria. VM_RAND_READ fails both criterias, obviously.

Once an area passes the temporal criteria, MGLRU additionally exploits
spatial locality by lru_gen_look_around(), which is also touched in
this patch. This part is good to know but not really relevant here.

2022-12-22 20:26:34

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

On Wed, Dec 21, 2022 at 11:13 PM Yuanchu Xie <[email protected]> wrote:
>
> From: Yu Zhao <[email protected]>
>
> Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
> VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
> places that check for locality are missing one of them. We add
> vma_has_locality to replace the existing locality checks for clarity.

Need benchmark results. A simple fio test will do; doesn't need to be
the curl one.

> + /*
> + * If we are reclaiming on behalf of a cgroup, skip counting on behalf
> + * of references from different cgroups

Nit: add a period at the end.

> @@ -906,6 +908,7 @@ int folio_referenced(struct folio *folio, int is_locked,
> .arg = (void *)&pra,
> .anon_lock = folio_lock_anon_vma_read,
> .try_lock = true,
> + .invalid_vma = invalid_folio_referenced_vma,

Nice. (What I suggested isn't as clean:
https://lore.kernel.org/all/Y31s%[email protected]/)

2022-12-22 20:50:27

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

On Thu, 22 Dec 2022 12:44:35 -0700 Yu Zhao <[email protected]> wrote:

> On Thu, Dec 22, 2022 at 11:49 AM Andrew Morton
> <[email protected]> wrote:
> >
> > On Wed, 21 Dec 2022 22:13:40 -0800 Yuanchu Xie <[email protected]> wrote:
> >
> > > From: Yu Zhao <[email protected]>
>
> This works; suggested-by probably works even better, since I didn't do
> the follow-up work.
>
> > > Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
> > > VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
> > > places that check for locality are missing one of them. We add
> > > vma_has_locality to replace the existing locality checks for clarity.
> >
> > I'm all confused. Surely VM_SEQ_READ implies locality and VM_RAND_READ
> > indicates no-locality?
>
> Spatially, yes. But we focus more on the temporal criteria here, i.e.,
> the reuse of an area within a relatively small duration. Both the
> active/inactive LRU and MGLRU rely on this.

Oh. Why didn't it say that ;)

How about s/locality/recency/g?


2022-12-22 22:33:30

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH 2/2] mm: support POSIX_FADV_NOREUSE for generic fadvise handler

On Wed, Dec 21, 2022 at 11:13 PM Yuanchu Xie <[email protected]> wrote:

Thanks for following up on this.

> POSIX_FADV_NOREUSE allows an application to specify that accesses to
> file data does not follow LRU and is used only once. Since 2.6.18 this
> is a no-op. We add FMODE_NOREUSE, checked in vma_has_locality to prevent
> LRU activation.

This needs to include what you plan to write on the man page.

A few questions to answer:
1. Does this flag work with accesses via FDs?
2. If there is a random or sequential file VMA, should the user choose
this flag or the VMA flag or both? Consider a) how those flags affect
readahead; b) their scopes, i.e., per VMA or per file.

Please also follow up with Jens to add this flag to fio.

Micheal reported that SVT-AV1 regressed with MGLRU, which is the only
real one [1]. The following not only fixes the regression but also
improves the baseline. Please follow up on that as well.

--- a/Source/App/EncApp/EbAppMain.c
+++ b/Source/App/EncApp/EbAppMain.c
@@ -115,6 +115,7 @@ void init_memory_file_map(EbConfig* config) {
fseeko(config->input_file, curr_loc, SEEK_SET); // seek
back to that location
#ifndef _WIN32
config->mmap.fd = fileno(config->input_file);
+ posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE);
#endif
}
config->mmap.file_frame_it = 0;

[1] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57

2022-12-30 22:40:34

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm: add vma_has_locality()

On Thu, Dec 22, 2022 at 1:29 PM Andrew Morton <[email protected]> wrote:
>
> On Thu, 22 Dec 2022 12:44:35 -0700 Yu Zhao <[email protected]> wrote:
>
> > On Thu, Dec 22, 2022 at 11:49 AM Andrew Morton
> > <[email protected]> wrote:
> > >
> > > On Wed, 21 Dec 2022 22:13:40 -0800 Yuanchu Xie <[email protected]> wrote:
> > >
> > > > From: Yu Zhao <[email protected]>
> >
> > This works; suggested-by probably works even better, since I didn't do
> > the follow-up work.
> >
> > > > Currently in vm_flags in vm_area_struct, both VM_SEQ_READ and
> > > > VM_RAND_READ indicate a lack of locality in accesses to the vma. Some
> > > > places that check for locality are missing one of them. We add
> > > > vma_has_locality to replace the existing locality checks for clarity.
> > >
> > > I'm all confused. Surely VM_SEQ_READ implies locality and VM_RAND_READ
> > > indicates no-locality?
> >
> > Spatially, yes. But we focus more on the temporal criteria here, i.e.,
> > the reuse of an area within a relatively small duration. Both the
> > active/inactive LRU and MGLRU rely on this.
>
> Oh. Why didn't it say that ;)
>
> How about s/locality/recency/g?

Thanks. I've done this, and posted the v2 which includes much better
commit messages.