The commit 7635d9cbe832 ("mm, thp, proc: report THP eligibility for each
vma") introduced THPeligible bit for processes' smaps. But, when checking
the eligibility for shmem vma, __transparent_hugepage_enabled() is
called to override the result from shmem_huge_enabled(). It may result
in the anonymous vma's THP flag override shmem's. For example, running a
simple test which create THP for shmem, but with anonymous THP disabled,
when reading the process's smaps, it may show:
7fc92ec00000-7fc92f000000 rw-s 00000000 00:14 27764 /dev/shm/test
Size: 4096 kB
...
[snip]
...
ShmemPmdMapped: 4096 kB
...
[snip]
...
THPeligible: 0
And, /proc/meminfo does show THP allocated and PMD mapped too:
ShmemHugePages: 4096 kB
ShmemPmdMapped: 4096 kB
This doesn't make too much sense. The shmem objects should be treated
separately from anonymous THP. Calling shmem_huge_enabled() with checking
MMF_DISABLE_THP sounds good enough. And, we could skip stack and
dax vma check since we already checked if the vma is shmem already.
The transhuge_vma_suitable() is needed to check vma, but it was only
available for shmem THP. The patch 1/2 makes it available for all kind of
THPs and does some code duplication cleanup, so it is made a separate patch.
Changelog:
v4: * Moved transhuge_vma_suitable() to include/linux/huge_mm.h and
regroup some functions in linux/include/mm.h. Per Hugh Dickins.
* Added Hugh’s Acked-by to patch 2/2.
v3: * Check if vma is suitable for allocating THP per Hugh Dickins.
* Fixed smaps output alignment and documentation per Hugh Dickins.
v2: * Check VM_NOHUGEPAGE per Michal Hocko.
Yang Shi (2):
mm: thp: make transhuge_vma_suitable available for anonymous THP
mm: thp: fix false negative of shmem vma's THP eligibility
Documentation/filesystems/proc.txt | 4 ++--
fs/proc/task_mmu.c | 3 ++-
include/linux/huge_mm.h | 23 +++++++++++++++++++++++
include/linux/mm.h | 34 +++++++++++++++++-----------------
mm/huge_memory.c | 11 ++++++++---
mm/memory.c | 13 -------------
mm/shmem.c | 3 +++
7 files changed, 55 insertions(+), 36 deletions(-)
The transhuge_vma_suitable() was only available for shmem THP, but
anonymous THP has the same check except pgoff check. And, it will be
used for THP eligible check in the later patch, so make it available for
all kind of THPs. This also helps reduce code duplication slightly.
Since anonymous THP doesn't have to check pgoff, so make pgoff check
shmem vma only.
And regroup some functions in include/linux/mm.h to solve compile issue since
transhuge_vma_suitable() needs call vma_is_anonymous() which was defined
after huge_mm.h is included.
Cc: Hugh Dickins <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Yang Shi <[email protected]>
---
include/linux/huge_mm.h | 23 +++++++++++++++++++++++
include/linux/mm.h | 34 +++++++++++++++++-----------------
mm/huge_memory.c | 2 +-
mm/memory.c | 13 -------------
4 files changed, 41 insertions(+), 31 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7cd5c15..45ede62 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -121,6 +121,23 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
bool transparent_hugepage_enabled(struct vm_area_struct *vma);
+#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
+
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ /* Don't have to check pgoff for anonymous vma */
+ if (!vma_is_anonymous(vma)) {
+ if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
+ (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
+ return false;
+ }
+
+ if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ return false;
+ return true;
+}
+
#define transparent_hugepage_use_zero_page() \
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
@@ -271,6 +288,12 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
return false;
}
+static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
+ unsigned long haddr)
+{
+ return false;
+}
+
static inline void prep_transhuge_page(struct page *page) {}
#define transparent_hugepage_flags 0UL
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0389c34..beae0ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -541,6 +541,23 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
vma->vm_ops = NULL;
}
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+ return !vma->vm_ops;
+}
+
+#ifdef CONFIG_SHMEM
+/*
+ * The vma_is_shmem is not inline because it is used only by slow
+ * paths in userfault.
+ */
+bool vma_is_shmem(struct vm_area_struct *vma);
+#else
+static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+#endif
+
+int vma_is_stack_for_current(struct vm_area_struct *vma);
+
/* flush_tlb_range() takes a vma, not a mm, and can care about flags */
#define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
@@ -1629,23 +1646,6 @@ static inline void cancel_dirty_page(struct page *page)
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
-static inline bool vma_is_anonymous(struct vm_area_struct *vma)
-{
- return !vma->vm_ops;
-}
-
-#ifdef CONFIG_SHMEM
-/*
- * The vma_is_shmem is not inline because it is used only by slow
- * paths in userfault.
- */
-bool vma_is_shmem(struct vm_area_struct *vma);
-#else
-static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
-#endif
-
-int vma_is_stack_for_current(struct vm_area_struct *vma);
-
extern unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 885642c..782dd14 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -689,7 +689,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
struct page *page;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
- if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+ if (!transhuge_vma_suitable(vma, haddr))
return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
diff --git a/mm/memory.c b/mm/memory.c
index 89325f9..e2bb51b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3162,19 +3162,6 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
}
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
-
-#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
-static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
- unsigned long haddr)
-{
- if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
- (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
- return false;
- if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
- return false;
- return true;
-}
-
static void deposit_prealloc_pte(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
--
1.8.3.1
The commit 7635d9cbe832 ("mm, thp, proc: report THP eligibility for each
vma") introduced THPeligible bit for processes' smaps. But, when checking
the eligibility for shmem vma, __transparent_hugepage_enabled() is
called to override the result from shmem_huge_enabled(). It may result
in the anonymous vma's THP flag override shmem's. For example, running a
simple test which create THP for shmem, but with anonymous THP disabled,
when reading the process's smaps, it may show:
7fc92ec00000-7fc92f000000 rw-s 00000000 00:14 27764 /dev/shm/test
Size: 4096 kB
...
[snip]
...
ShmemPmdMapped: 4096 kB
...
[snip]
...
THPeligible: 0
And, /proc/meminfo does show THP allocated and PMD mapped too:
ShmemHugePages: 4096 kB
ShmemPmdMapped: 4096 kB
This doesn't make too much sense. The shmem objects should be treated
separately from anonymous THP. Calling shmem_huge_enabled() with checking
MMF_DISABLE_THP sounds good enough. And, we could skip stack and
dax vma check since we already checked if the vma is shmem already.
Also check if vma is suitable for THP by calling
transhuge_vma_suitable().
And minor fix to smaps output format and documentation.
Fixes: 7635d9cbe832 ("mm, thp, proc: report THP eligibility for each vma")
Acked-by: Hugh Dickins <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: David Rientjes <[email protected]>
Signed-off-by: Yang Shi <[email protected]>
---
Documentation/filesystems/proc.txt | 4 ++--
fs/proc/task_mmu.c | 3 ++-
mm/huge_memory.c | 9 +++++++--
mm/shmem.c | 3 +++
4 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fb4735f..99ca040 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -486,8 +486,8 @@ replaced by copy-on-write) part of the underlying shmem object out on swap.
"SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this
does not take into account swapped out page of underlying shmem objects.
"Locked" indicates whether the mapping is locked in memory or not.
-"THPeligible" indicates whether the mapping is eligible for THP pages - 1 if
-true, 0 otherwise.
+"THPeligible" indicates whether the mapping is eligible for allocating THP
+pages - 1 if true, 0 otherwise. It just shows the current status.
"VmFlags" field deserves a separate description. This member represents the kernel
flags associated with the particular virtual memory area in two letter encoded
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 818cedb..731642e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -832,7 +832,8 @@ static int show_smap(struct seq_file *m, void *v)
__show_smap(m, &mss, false);
- seq_printf(m, "THPeligible: %d\n", transparent_hugepage_enabled(vma));
+ seq_printf(m, "THPeligible: %d\n",
+ transparent_hugepage_enabled(vma));
if (arch_pkeys_enabled())
seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 782dd14..1334ede 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -63,10 +63,15 @@
bool transparent_hugepage_enabled(struct vm_area_struct *vma)
{
+ /* The addr is used to check if the vma size fits */
+ unsigned long addr = (vma->vm_end & HPAGE_PMD_MASK) - HPAGE_PMD_SIZE;
+
+ if (!transhuge_vma_suitable(vma, addr))
+ return false;
if (vma_is_anonymous(vma))
return __transparent_hugepage_enabled(vma);
- if (vma_is_shmem(vma) && shmem_huge_enabled(vma))
- return __transparent_hugepage_enabled(vma);
+ if (vma_is_shmem(vma))
+ return shmem_huge_enabled(vma);
return false;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index f4dce9c..64e5d59 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3872,6 +3872,9 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
loff_t i_size;
pgoff_t off;
+ if ((vma->vm_flags & VM_NOHUGEPAGE) ||
+ test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+ return false;
if (shmem_huge == SHMEM_HUGE_FORCE)
return true;
if (shmem_huge == SHMEM_HUGE_DENY)
--
1.8.3.1
On Thu, 18 Jul 2019, Yang Shi wrote:
> The transhuge_vma_suitable() was only available for shmem THP, but
> anonymous THP has the same check except pgoff check. And, it will be
> used for THP eligible check in the later patch, so make it available for
> all kind of THPs. This also helps reduce code duplication slightly.
>
> Since anonymous THP doesn't have to check pgoff, so make pgoff check
> shmem vma only.
>
> And regroup some functions in include/linux/mm.h to solve compile issue since
> transhuge_vma_suitable() needs call vma_is_anonymous() which was defined
> after huge_mm.h is included.
>
> Cc: Hugh Dickins <[email protected]>
Thanks!
Acked-by: Hugh Dickins <[email protected]>
> Cc: Kirill A. Shutemov <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Vlastimil Babka <[email protected]>
> Cc: David Rientjes <[email protected]>
> Signed-off-by: Yang Shi <[email protected]>
> ---
> include/linux/huge_mm.h | 23 +++++++++++++++++++++++
> include/linux/mm.h | 34 +++++++++++++++++-----------------
> mm/huge_memory.c | 2 +-
> mm/memory.c | 13 -------------
> 4 files changed, 41 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 7cd5c15..45ede62 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -121,6 +121,23 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
>
> bool transparent_hugepage_enabled(struct vm_area_struct *vma);
>
> +#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
> +
> +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> + unsigned long haddr)
> +{
> + /* Don't have to check pgoff for anonymous vma */
> + if (!vma_is_anonymous(vma)) {
> + if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
> + (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
> + return false;
> + }
> +
> + if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
> + return false;
> + return true;
> +}
> +
> #define transparent_hugepage_use_zero_page() \
> (transparent_hugepage_flags & \
> (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
> @@ -271,6 +288,12 @@ static inline bool transparent_hugepage_enabled(struct vm_area_struct *vma)
> return false;
> }
>
> +static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> + unsigned long haddr)
> +{
> + return false;
> +}
> +
> static inline void prep_transhuge_page(struct page *page) {}
>
> #define transparent_hugepage_flags 0UL
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0389c34..beae0ae 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -541,6 +541,23 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
> vma->vm_ops = NULL;
> }
>
> +static inline bool vma_is_anonymous(struct vm_area_struct *vma)
> +{
> + return !vma->vm_ops;
> +}
> +
> +#ifdef CONFIG_SHMEM
> +/*
> + * The vma_is_shmem is not inline because it is used only by slow
> + * paths in userfault.
> + */
> +bool vma_is_shmem(struct vm_area_struct *vma);
> +#else
> +static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
> +#endif
> +
> +int vma_is_stack_for_current(struct vm_area_struct *vma);
> +
> /* flush_tlb_range() takes a vma, not a mm, and can care about flags */
> #define TLB_FLUSH_VMA(mm,flags) { .vm_mm = (mm), .vm_flags = (flags) }
>
> @@ -1629,23 +1646,6 @@ static inline void cancel_dirty_page(struct page *page)
>
> int get_cmdline(struct task_struct *task, char *buffer, int buflen);
>
> -static inline bool vma_is_anonymous(struct vm_area_struct *vma)
> -{
> - return !vma->vm_ops;
> -}
> -
> -#ifdef CONFIG_SHMEM
> -/*
> - * The vma_is_shmem is not inline because it is used only by slow
> - * paths in userfault.
> - */
> -bool vma_is_shmem(struct vm_area_struct *vma);
> -#else
> -static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
> -#endif
> -
> -int vma_is_stack_for_current(struct vm_area_struct *vma);
> -
> extern unsigned long move_page_tables(struct vm_area_struct *vma,
> unsigned long old_addr, struct vm_area_struct *new_vma,
> unsigned long new_addr, unsigned long len,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 885642c..782dd14 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -689,7 +689,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
> struct page *page;
> unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
>
> - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
> + if (!transhuge_vma_suitable(vma, haddr))
> return VM_FAULT_FALLBACK;
> if (unlikely(anon_vma_prepare(vma)))
> return VM_FAULT_OOM;
> diff --git a/mm/memory.c b/mm/memory.c
> index 89325f9..e2bb51b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3162,19 +3162,6 @@ static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf)
> }
>
> #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
> -
> -#define HPAGE_CACHE_INDEX_MASK (HPAGE_PMD_NR - 1)
> -static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
> - unsigned long haddr)
> -{
> - if (((vma->vm_start >> PAGE_SHIFT) & HPAGE_CACHE_INDEX_MASK) !=
> - (vma->vm_pgoff & HPAGE_CACHE_INDEX_MASK))
> - return false;
> - if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
> - return false;
> - return true;
> -}
> -
> static void deposit_prealloc_pte(struct vm_fault *vmf)
> {
> struct vm_area_struct *vma = vmf->vma;
> --
> 1.8.3.1
>
>