At present, the split counters in THP statistics no longer include
PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
counters to monitor the frequency of mTHP splits. This will assist
developers in better analyzing and optimizing system performance.
/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
split_page
split_page_failed
deferred_split_page
Signed-off-by: Lance Yang <[email protected]>
---
include/linux/huge_mm.h | 3 +++
mm/huge_memory.c | 14 ++++++++++++--
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 56c7ea73090b..7b9c6590e1f7 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -272,6 +272,9 @@ enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
MTHP_STAT_ANON_SWPOUT,
MTHP_STAT_ANON_SWPOUT_FALLBACK,
+ MTHP_STAT_SPLIT_PAGE,
+ MTHP_STAT_SPLIT_PAGE_FAILED,
+ MTHP_STAT_DEFERRED_SPLIT_PAGE,
__MTHP_STAT_COUNT
};
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 055df5aac7c3..52db888e47a6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
+DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
+DEFINE_MTHP_STAT_ATTR(deferred_split_page, MTHP_STAT_DEFERRED_SPLIT_PAGE);
static struct attribute *stats_attrs[] = {
&anon_fault_alloc_attr.attr,
@@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
&anon_fault_fallback_charge_attr.attr,
&anon_swpout_attr.attr,
&anon_swpout_fallback_attr.attr,
+ &split_page_attr.attr,
+ &split_page_failed_attr.attr,
+ &deferred_split_page_attr.attr,
NULL,
};
@@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
- bool is_thp = folio_test_pmd_mappable(folio);
+ int order = folio_order(folio);
int extra_pins, ret;
pgoff_t end;
bool is_hzp;
@@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
i_mmap_unlock_read(mapping);
out:
xas_destroy(&xas);
- if (is_thp)
+ if (order >= HPAGE_PMD_ORDER)
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
+ count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
+ MTHP_STAT_SPLIT_PAGE_FAILED);
return ret;
}
@@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
if (list_empty(&folio->_deferred_list)) {
if (folio_test_pmd_mappable(folio))
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+ count_mthp_stat(folio_order(folio),
+ MTHP_STAT_DEFERRED_SPLIT_PAGE);
list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
ds_queue->split_queue_len++;
#ifdef CONFIG_MEMCG
--
2.33.1
+ Barry
On 24/04/2024 14:51, Lance Yang wrote:
> At present, the split counters in THP statistics no longer include
> PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
> counters to monitor the frequency of mTHP splits. This will assist
> developers in better analyzing and optimizing system performance.
>
> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> split_page
> split_page_failed
> deferred_split_page
>
> Signed-off-by: Lance Yang <[email protected]>
> ---
> include/linux/huge_mm.h | 3 +++
> mm/huge_memory.c | 14 ++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 56c7ea73090b..7b9c6590e1f7 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -272,6 +272,9 @@ enum mthp_stat_item {
> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> MTHP_STAT_ANON_SWPOUT,
> MTHP_STAT_ANON_SWPOUT_FALLBACK,
> + MTHP_STAT_SPLIT_PAGE,
> + MTHP_STAT_SPLIT_PAGE_FAILED,
> + MTHP_STAT_DEFERRED_SPLIT_PAGE,
> __MTHP_STAT_COUNT
> };
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 055df5aac7c3..52db888e47a6 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
> +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
> +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
> +DEFINE_MTHP_STAT_ATTR(deferred_split_page, MTHP_STAT_DEFERRED_SPLIT_PAGE);
>
> static struct attribute *stats_attrs[] = {
> &anon_fault_alloc_attr.attr,
> @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
> &anon_fault_fallback_charge_attr.attr,
> &anon_swpout_attr.attr,
> &anon_swpout_fallback_attr.attr,
> + &split_page_attr.attr,
> + &split_page_failed_attr.attr,
> + &deferred_split_page_attr.attr,
> NULL,
> };
>
> @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
> struct anon_vma *anon_vma = NULL;
> struct address_space *mapping = NULL;
> - bool is_thp = folio_test_pmd_mappable(folio);
> + int order = folio_order(folio);
> int extra_pins, ret;
> pgoff_t end;
> bool is_hzp;
> @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> i_mmap_unlock_read(mapping);
> out:
> xas_destroy(&xas);
> - if (is_thp)
> + if (order >= HPAGE_PMD_ORDER)
> count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
> + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
> + MTHP_STAT_SPLIT_PAGE_FAILED);
> return ret;
> }
>
> @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
> if (list_empty(&folio->_deferred_list)) {
> if (folio_test_pmd_mappable(folio))
> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> + count_mthp_stat(folio_order(folio),
> + MTHP_STAT_DEFERRED_SPLIT_PAGE);
There is a very long conversation with Barry about adding a 'global "mTHP became
partially mapped 1 or more processes" counter (inc only)', which terminates at
[1]. There is a lot of discussion about the required semantics around the need
for partial map to cover alignment and contiguity as well as whether all pages
are mapped, and to trigger once it becomes partial in at least 1 process.
MTHP_STAT_DEFERRED_SPLIT_PAGE is giving much simpler semantics, but less
information as a result. Barry, what's your view here? I'm guessing this doesn't
quite solve what you are looking for?
[1] https://lore.kernel.org/linux-mm/[email protected]/
Thanks,
Ryan
> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
> ds_queue->split_queue_len++;
> #ifdef CONFIG_MEMCG
Hey Lance,
On 2024/4/24 21:51, Lance Yang wrote:
> At present, the split counters in THP statistics no longer include
> PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
> counters to monitor the frequency of mTHP splits. This will assist
> developers in better analyzing and optimizing system performance.
>
> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> split_page
> split_page_failed
> deferred_split_page
>
> Signed-off-by: Lance Yang <[email protected]>
> ---
> include/linux/huge_mm.h | 3 +++
> mm/huge_memory.c | 14 ++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 56c7ea73090b..7b9c6590e1f7 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -272,6 +272,9 @@ enum mthp_stat_item {
> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> MTHP_STAT_ANON_SWPOUT,
> MTHP_STAT_ANON_SWPOUT_FALLBACK,
> + MTHP_STAT_SPLIT_PAGE,
> + MTHP_STAT_SPLIT_PAGE_FAILED,
> + MTHP_STAT_DEFERRED_SPLIT_PAGE,
> __MTHP_STAT_COUNT
> };
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 055df5aac7c3..52db888e47a6 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
> +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
> +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
> +DEFINE_MTHP_STAT_ATTR(deferred_split_page, MTHP_STAT_DEFERRED_SPLIT_PAGE);
>
> static struct attribute *stats_attrs[] = {
> &anon_fault_alloc_attr.attr,
> @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
> &anon_fault_fallback_charge_attr.attr,
> &anon_swpout_attr.attr,
> &anon_swpout_fallback_attr.attr,
> + &split_page_attr.attr,
> + &split_page_failed_attr.attr,
> + &deferred_split_page_attr.attr,
> NULL,
> };
>
> @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
> struct anon_vma *anon_vma = NULL;
> struct address_space *mapping = NULL;
> - bool is_thp = folio_test_pmd_mappable(folio);
> + int order = folio_order(folio);
> int extra_pins, ret;
> pgoff_t end;
> bool is_hzp;
> @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> i_mmap_unlock_read(mapping);
> out:
> xas_destroy(&xas);
> - if (is_thp)
> + if (order >= HPAGE_PMD_ORDER)
> count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
> + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
> + MTHP_STAT_SPLIT_PAGE_FAILED);
> return ret;
> }
>
> @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
> if (list_empty(&folio->_deferred_list)) {
> if (folio_test_pmd_mappable(folio))
> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> + count_mthp_stat(folio_order(folio),
> + MTHP_STAT_DEFERRED_SPLIT_PAGE);
> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
> ds_queue->split_queue_len++;
> #ifdef CONFIG_MEMCG
My opinion can be ignored :). Would it be better to modify the
deferred_split_folio
function as follows? I'm not sure.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index
055df5aac7c3..e8562e8630b1 100644 --- a/mm/huge_memory.c +++
b/mm/huge_memory.c @@ -3299,12 +3299,13 @@ void
deferred_split_folio(struct folio *folio) struct mem_cgroup *memcg =
folio_memcg(folio); #endif unsigned long flags; + int order =
folio_order(folio); /* * Order 1 folios have no space for a deferred
list, but we also * won't waste much memory by not adding them to the
deferred list. */ - if (folio_order(folio) <= 1) + if (order <= 1)
return; /* @@ -3325,8 +3326,9 @@ void deferred_split_folio(struct folio
*folio) spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if
(list_empty(&folio->_deferred_list)) { - if
(folio_test_pmd_mappable(folio)) + if (order >= HPAGE_PMD_ORDER)
count_vm_event(THP_DEFERRED_SPLIT_PAGE); + count_mthp_stat(order,
MTHP_STAT_DEFERRED_SPLIT_PAGE); list_add_tail(&folio->_deferred_list,
&ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef CONFIG_MEMCG thanks,
bang
Hey, sorry for making noise, there was something wrong with the format of
the last email.
On 2024/4/25 1:12, Bang Li wrote:
> Hey Lance,
>
> On 2024/4/24 21:51, Lance Yang wrote:
>
>> At present, the split counters in THP statistics no longer include
>> PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
>> counters to monitor the frequency of mTHP splits. This will assist
>> developers in better analyzing and optimizing system performance.
>>
>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
>> split_page
>> split_page_failed
>> deferred_split_page
>>
>> Signed-off-by: Lance Yang <[email protected]>
>> ---
>> include/linux/huge_mm.h | 3 +++
>> mm/huge_memory.c | 14 ++++++++++++--
>> 2 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 56c7ea73090b..7b9c6590e1f7 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -272,6 +272,9 @@ enum mthp_stat_item {
>> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>> MTHP_STAT_ANON_SWPOUT,
>> MTHP_STAT_ANON_SWPOUT_FALLBACK,
>> + MTHP_STAT_SPLIT_PAGE,
>> + MTHP_STAT_SPLIT_PAGE_FAILED,
>> + MTHP_STAT_DEFERRED_SPLIT_PAGE,
>> __MTHP_STAT_COUNT
>> };
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 055df5aac7c3..52db888e47a6 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback,
>> MTHP_STAT_ANON_FAULT_FALLBACK);
>> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge,
>> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback,
>> MTHP_STAT_ANON_SWPOUT_FALLBACK);
>> +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
>> +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
>> +DEFINE_MTHP_STAT_ATTR(deferred_split_page,
>> MTHP_STAT_DEFERRED_SPLIT_PAGE);
>> static struct attribute *stats_attrs[] = {
>> &anon_fault_alloc_attr.attr,
>> @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
>> &anon_fault_fallback_charge_attr.attr,
>> &anon_swpout_attr.attr,
>> &anon_swpout_fallback_attr.attr,
>> + &split_page_attr.attr,
>> + &split_page_failed_attr.attr,
>> + &deferred_split_page_attr.attr,
>> NULL,
>> };
>> @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct
>> page *page, struct list_head *list,
>> XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index,
>> new_order);
>> struct anon_vma *anon_vma = NULL;
>> struct address_space *mapping = NULL;
>> - bool is_thp = folio_test_pmd_mappable(folio);
>> + int order = folio_order(folio);
>> int extra_pins, ret;
>> pgoff_t end;
>> bool is_hzp;
>> @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct
>> page *page, struct list_head *list,
>> i_mmap_unlock_read(mapping);
>> out:
>> xas_destroy(&xas);
>> - if (is_thp)
>> + if (order >= HPAGE_PMD_ORDER)
>> count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
>> + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
>> + MTHP_STAT_SPLIT_PAGE_FAILED);
>> return ret;
>> }
>> @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
>> if (list_empty(&folio->_deferred_list)) {
>> if (folio_test_pmd_mappable(folio))
>> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
>> + count_mthp_stat(folio_order(folio),
>> + MTHP_STAT_DEFERRED_SPLIT_PAGE);
>> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
>> ds_queue->split_queue_len++;
>> #ifdef CONFIG_MEMCG
>
> My opinion can be ignored :). Would it be better to modify the
> deferred_split_folio
> function as follows? I'm not sure.
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c index
> 055df5aac7c3..e8562e8630b1 100644 --- a/mm/huge_memory.c +++
> b/mm/huge_memory.c @@ -3299,12 +3299,13 @@ void
> deferred_split_folio(struct folio *folio) struct mem_cgroup *memcg =
> folio_memcg(folio); #endif unsigned long flags; + int order =
> folio_order(folio); /* * Order 1 folios have no space for a deferred
> list, but we also * won't waste much memory by not adding them to the
> deferred list. */ - if (folio_order(folio) <= 1) + if (order <= 1)
> return; /* @@ -3325,8 +3326,9 @@ void deferred_split_folio(struct
> folio *folio) spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
> if (list_empty(&folio->_deferred_list)) { - if
> (folio_test_pmd_mappable(folio)) + if (order >= HPAGE_PMD_ORDER)
> count_vm_event(THP_DEFERRED_SPLIT_PAGE); + count_mthp_stat(order,
> MTHP_STAT_DEFERRED_SPLIT_PAGE); list_add_tail(&folio->_deferred_list,
> &ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef
> CONFIG_MEMCG thanks,
> bang
>
My opinion can be ignored :). Would it be better to modify the
deferred_split_folio
function as follows? I'm not sure.
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 055df5aac7c3..e8562e8630b1 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3299,12 +3299,13 @@ void deferred_split_folio(struct folio *folio)
struct mem_cgroup *memcg = folio_memcg(folio);
#endif
unsigned long flags;
+ int order = folio_order(folio);
/*
* Order 1 folios have no space for a deferred list, but we also
* won't waste much memory by not adding them to the deferred list.
*/
- if (folio_order(folio) <= 1)
+ if (order <= 1)
return;
/*
@@ -3325,8 +3326,9 @@ void deferred_split_folio(struct folio *folio)
spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
if (list_empty(&folio->_deferred_list)) {
- if (folio_test_pmd_mappable(folio))
+ if (order >= HPAGE_PMD_ORDER)
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
+ count_mthp_stat(order, MTHP_STAT_DEFERRED_SPLIT_PAGE);
list_add_tail(&folio->_deferred_list,
&ds_queue->split_queue);
ds_queue->split_queue_len++;
#ifdef CONFIG_MEMCG
thanks,
bang
On Wed, Apr 24, 2024 at 6:53 AM Lance Yang <[email protected]> wrote:
>
> At present, the split counters in THP statistics no longer include
> PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
> counters to monitor the frequency of mTHP splits. This will assist
> developers in better analyzing and optimizing system performance.
>
> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> split_page
> split_page_failed
> deferred_split_page
The deferred_split_page counter may easily go insane with the fix from
https://lore.kernel.org/linux-mm/[email protected]/
Zi Yan,
Will you submit v2 for this patch soon?
>
> Signed-off-by: Lance Yang <[email protected]>
> ---
> include/linux/huge_mm.h | 3 +++
> mm/huge_memory.c | 14 ++++++++++++--
> 2 files changed, 15 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 56c7ea73090b..7b9c6590e1f7 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -272,6 +272,9 @@ enum mthp_stat_item {
> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> MTHP_STAT_ANON_SWPOUT,
> MTHP_STAT_ANON_SWPOUT_FALLBACK,
> + MTHP_STAT_SPLIT_PAGE,
> + MTHP_STAT_SPLIT_PAGE_FAILED,
> + MTHP_STAT_DEFERRED_SPLIT_PAGE,
> __MTHP_STAT_COUNT
> };
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 055df5aac7c3..52db888e47a6 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
> +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
> +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
> +DEFINE_MTHP_STAT_ATTR(deferred_split_page, MTHP_STAT_DEFERRED_SPLIT_PAGE);
>
> static struct attribute *stats_attrs[] = {
> &anon_fault_alloc_attr.attr,
> @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
> &anon_fault_fallback_charge_attr.attr,
> &anon_swpout_attr.attr,
> &anon_swpout_fallback_attr.attr,
> + &split_page_attr.attr,
> + &split_page_failed_attr.attr,
> + &deferred_split_page_attr.attr,
> NULL,
> };
>
> @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
> struct anon_vma *anon_vma = NULL;
> struct address_space *mapping = NULL;
> - bool is_thp = folio_test_pmd_mappable(folio);
> + int order = folio_order(folio);
> int extra_pins, ret;
> pgoff_t end;
> bool is_hzp;
> @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> i_mmap_unlock_read(mapping);
> out:
> xas_destroy(&xas);
> - if (is_thp)
> + if (order >= HPAGE_PMD_ORDER)
> count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
> + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
> + MTHP_STAT_SPLIT_PAGE_FAILED);
> return ret;
> }
>
> @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
> if (list_empty(&folio->_deferred_list)) {
> if (folio_test_pmd_mappable(folio))
> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> + count_mthp_stat(folio_order(folio),
> + MTHP_STAT_DEFERRED_SPLIT_PAGE);
> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
> ds_queue->split_queue_len++;
> #ifdef CONFIG_MEMCG
> --
> 2.33.1
>
>
Hey Bang,
Thanks for taking time to review!
On Thu, Apr 25, 2024 at 1:59 AM Bang Li <[email protected]> wrote:
>
> Hey, sorry for making noise, there was something wrong with the format of
> the last email.
>
> On 2024/4/25 1:12, Bang Li wrote:
> > Hey Lance,
> >
> > On 2024/4/24 21:51, Lance Yang wrote:
> >
> >> At present, the split counters in THP statistics no longer include
> >> PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
> >> counters to monitor the frequency of mTHP splits. This will assist
> >> developers in better analyzing and optimizing system performance.
> >>
> >> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> >> split_page
> >> split_page_failed
> >> deferred_split_page
> >>
> >> Signed-off-by: Lance Yang <[email protected]>
> >> ---
> >> include/linux/huge_mm.h | 3 +++
> >> mm/huge_memory.c | 14 ++++++++++++--
> >> 2 files changed, 15 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> >> index 56c7ea73090b..7b9c6590e1f7 100644
> >> --- a/include/linux/huge_mm.h
> >> +++ b/include/linux/huge_mm.h
> >> @@ -272,6 +272,9 @@ enum mthp_stat_item {
> >> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> >> MTHP_STAT_ANON_SWPOUT,
> >> MTHP_STAT_ANON_SWPOUT_FALLBACK,
> >> + MTHP_STAT_SPLIT_PAGE,
> >> + MTHP_STAT_SPLIT_PAGE_FAILED,
> >> + MTHP_STAT_DEFERRED_SPLIT_PAGE,
> >> __MTHP_STAT_COUNT
> >> };
> >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >> index 055df5aac7c3..52db888e47a6 100644
> >> --- a/mm/huge_memory.c
> >> +++ b/mm/huge_memory.c
> >> @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback,
> >> MTHP_STAT_ANON_FAULT_FALLBACK);
> >> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge,
> >> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> >> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
> >> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback,
> >> MTHP_STAT_ANON_SWPOUT_FALLBACK);
> >> +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
> >> +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
> >> +DEFINE_MTHP_STAT_ATTR(deferred_split_page,
> >> MTHP_STAT_DEFERRED_SPLIT_PAGE);
> >> static struct attribute *stats_attrs[] = {
> >> &anon_fault_alloc_attr.attr,
> >> @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
> >> &anon_fault_fallback_charge_attr.attr,
> >> &anon_swpout_attr.attr,
> >> &anon_swpout_fallback_attr.attr,
> >> + &split_page_attr.attr,
> >> + &split_page_failed_attr.attr,
> >> + &deferred_split_page_attr.attr,
> >> NULL,
> >> };
> >> @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct
> >> page *page, struct list_head *list,
> >> XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index,
> >> new_order);
> >> struct anon_vma *anon_vma = NULL;
> >> struct address_space *mapping = NULL;
> >> - bool is_thp = folio_test_pmd_mappable(folio);
> >> + int order = folio_order(folio);
> >> int extra_pins, ret;
> >> pgoff_t end;
> >> bool is_hzp;
> >> @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct
> >> page *page, struct list_head *list,
> >> i_mmap_unlock_read(mapping);
> >> out:
> >> xas_destroy(&xas);
> >> - if (is_thp)
> >> + if (order >= HPAGE_PMD_ORDER)
> >> count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
> >> + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
> >> + MTHP_STAT_SPLIT_PAGE_FAILED);
> >> return ret;
> >> }
> >> @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
> >> if (list_empty(&folio->_deferred_list)) {
> >> if (folio_test_pmd_mappable(folio))
> >> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> >> + count_mthp_stat(folio_order(folio),
> >> + MTHP_STAT_DEFERRED_SPLIT_PAGE);
> >> list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
> >> ds_queue->split_queue_len++;
> >> #ifdef CONFIG_MEMCG
> >
> > My opinion can be ignored :). Would it be better to modify the
> > deferred_split_folio
> > function as follows? I'm not sure.
> >
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c index
> > 055df5aac7c3..e8562e8630b1 100644 --- a/mm/huge_memory.c +++
> > b/mm/huge_memory.c @@ -3299,12 +3299,13 @@ void
> > deferred_split_folio(struct folio *folio) struct mem_cgroup *memcg =
> > folio_memcg(folio); #endif unsigned long flags; + int order =
> > folio_order(folio); /* * Order 1 folios have no space for a deferred
> > list, but we also * won't waste much memory by not adding them to the
> > deferred list. */ - if (folio_order(folio) <= 1) + if (order <= 1)
> > return; /* @@ -3325,8 +3326,9 @@ void deferred_split_folio(struct
> > folio *folio) spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
> > if (list_empty(&folio->_deferred_list)) { - if
> > (folio_test_pmd_mappable(folio)) + if (order >= HPAGE_PMD_ORDER)
> > count_vm_event(THP_DEFERRED_SPLIT_PAGE); + count_mthp_stat(order,
> > MTHP_STAT_DEFERRED_SPLIT_PAGE); list_add_tail(&folio->_deferred_list,
> > &ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef
> > CONFIG_MEMCG thanks,
> > bang
> >
>
> My opinion can be ignored :). Would it be better to modify the
> deferred_split_folio
> function as follows? I'm not sure.
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 055df5aac7c3..e8562e8630b1 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3299,12 +3299,13 @@ void deferred_split_folio(struct folio *folio)
> struct mem_cgroup *memcg = folio_memcg(folio);
> #endif
> unsigned long flags;
> + int order = folio_order(folio);
I'll consider storing it in a variable earlier for later reuse.
Thanks,
Lance
>
> /*
> * Order 1 folios have no space for a deferred list, but we also
> * won't waste much memory by not adding them to the deferred list.
> */
> - if (folio_order(folio) <= 1)
> + if (order <= 1)
> return;
>
> /*
> @@ -3325,8 +3326,9 @@ void deferred_split_folio(struct folio *folio)
>
> spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
> if (list_empty(&folio->_deferred_list)) {
> - if (folio_test_pmd_mappable(folio))
> + if (order >= HPAGE_PMD_ORDER)
> count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> + count_mthp_stat(order, MTHP_STAT_DEFERRED_SPLIT_PAGE);
> list_add_tail(&folio->_deferred_list,
> &ds_queue->split_queue);
> ds_queue->split_queue_len++;
> #ifdef CONFIG_MEMCG
>
> thanks,
> bang
Hey Yang,
Thanks for taking time to review!
On Thu, Apr 25, 2024 at 3:44 AM Yang Shi <[email protected]> wrote:
>
> On Wed, Apr 24, 2024 at 6:53 AM Lance Yang <[email protected]> wrote:
> >
> > At present, the split counters in THP statistics no longer include
> > PTE-mapped mTHP. Therefore, this commit introduces per-order mTHP split
> > counters to monitor the frequency of mTHP splits. This will assist
> > developers in better analyzing and optimizing system performance.
> >
> > /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> > split_page
> > split_page_failed
> > deferred_split_page
>
> The deferred_split_page counter may easily go insane with the fix from
> https://lore.kernel.org/linux-mm/[email protected]/
>
> Zi Yan,
>
> Will you submit v2 for this patch soon?
Thanks for bringing this to my attention!
I'll follow Zi Yan's patch, then submit the next version.
Thanks,
Lance
>
>
> >
> > Signed-off-by: Lance Yang <[email protected]>
> > ---
> > include/linux/huge_mm.h | 3 +++
> > mm/huge_memory.c | 14 ++++++++++++--
> > 2 files changed, 15 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index 56c7ea73090b..7b9c6590e1f7 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -272,6 +272,9 @@ enum mthp_stat_item {
> > MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> > MTHP_STAT_ANON_SWPOUT,
> > MTHP_STAT_ANON_SWPOUT_FALLBACK,
> > + MTHP_STAT_SPLIT_PAGE,
> > + MTHP_STAT_SPLIT_PAGE_FAILED,
> > + MTHP_STAT_DEFERRED_SPLIT_PAGE,
> > __MTHP_STAT_COUNT
> > };
> >
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 055df5aac7c3..52db888e47a6 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -557,6 +557,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> > DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> > DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
> > DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
> > +DEFINE_MTHP_STAT_ATTR(split_page, MTHP_STAT_SPLIT_PAGE);
> > +DEFINE_MTHP_STAT_ATTR(split_page_failed, MTHP_STAT_SPLIT_PAGE_FAILED);
> > +DEFINE_MTHP_STAT_ATTR(deferred_split_page, MTHP_STAT_DEFERRED_SPLIT_PAGE);
> >
> > static struct attribute *stats_attrs[] = {
> > &anon_fault_alloc_attr.attr,
> > @@ -564,6 +567,9 @@ static struct attribute *stats_attrs[] = {
> > &anon_fault_fallback_charge_attr.attr,
> > &anon_swpout_attr.attr,
> > &anon_swpout_fallback_attr.attr,
> > + &split_page_attr.attr,
> > + &split_page_failed_attr.attr,
> > + &deferred_split_page_attr.attr,
> > NULL,
> > };
> >
> > @@ -3083,7 +3089,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> > XA_STATE_ORDER(xas, &folio->mapping->i_pages, folio->index, new_order);
> > struct anon_vma *anon_vma = NULL;
> > struct address_space *mapping = NULL;
> > - bool is_thp = folio_test_pmd_mappable(folio);
> > + int order = folio_order(folio);
> > int extra_pins, ret;
> > pgoff_t end;
> > bool is_hzp;
> > @@ -3262,8 +3268,10 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> > i_mmap_unlock_read(mapping);
> > out:
> > xas_destroy(&xas);
> > - if (is_thp)
> > + if (order >= HPAGE_PMD_ORDER)
> > count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
> > + count_mthp_stat(order, !ret ? MTHP_STAT_SPLIT_PAGE :
> > + MTHP_STAT_SPLIT_PAGE_FAILED);
> > return ret;
> > }
> >
> > @@ -3327,6 +3335,8 @@ void deferred_split_folio(struct folio *folio)
> > if (list_empty(&folio->_deferred_list)) {
> > if (folio_test_pmd_mappable(folio))
> > count_vm_event(THP_DEFERRED_SPLIT_PAGE);
> > + count_mthp_stat(folio_order(folio),
> > + MTHP_STAT_DEFERRED_SPLIT_PAGE);
> > list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
> > ds_queue->split_queue_len++;
> > #ifdef CONFIG_MEMCG
> > --
> > 2.33.1
> >
> >