by Baolin Wang

[permalink] [raw]

Subject: Re: [PATCH v2 5/7] mm: shmem: add mTHP support for anonymous shmem

On 2024/5/14 21:36, Lance Yang wrote:
> Hi Baolin,
>
> On Mon, May 13, 2024 at 1:08 PM Baolin Wang
> <[email protected]> wrote:
>>
>> Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that
>> can allow THP to be configured through the sysfs interface located at
>> '/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'.
>>
>> However, the anonymous share pages will ignore the anonymous mTHP rule
>> configured through the sysfs interface, and can only use the PMD-mapped
>> THP, that is not reasonable. Users expect to apply the mTHP rule for
>> all anonymous pages, including the anonymous share pages, in order to
>> enjoy the benefits of mTHP. For example, lower latency than PMD-mapped THP,
>> smaller memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture
>> to reduce TLB miss etc.
>>
>> The primary strategy is similar to supporting anonymous mTHP. Introduce
>> a new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled',
>> which can have all the same values as the top-level
>> '/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new
>> additional "inherit" option. By default all sizes will be set to "never"
>> except PMD size, which is set to "inherit". This ensures backward compatibility
>> with the anonymous shmem enabled of the top level, meanwhile also allows
>> independent control of anonymous shmem enabled for each mTHP.
>>
>> Signed-off-by: Baolin Wang <[email protected]>
>> ---
>> include/linux/huge_mm.h | 10 +++
>> mm/shmem.c | 179 +++++++++++++++++++++++++++++++++-------
>> 2 files changed, 161 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 1fce6fee7766..b5339210268d 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -583,6 +583,16 @@ static inline bool thp_migration_supported(void)
>> {
>> return false;
>> }
>> +
>> +static inline int highest_order(unsigned long orders)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline int next_order(unsigned long *orders, int prev)
>> +{
>> + return 0;
>> +}
>> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>
>> static inline int split_folio_to_list_to_order(struct folio *folio,
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 59cc26d44344..b50ddf013e37 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1611,6 +1611,106 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>> return result;
>> }
>>
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> + struct vm_area_struct *vma, pgoff_t index,
>> + bool global_huge)
>> +{
>> + unsigned long mask = READ_ONCE(huge_anon_shmem_orders_always);
>> + unsigned long within_size_orders = READ_ONCE(huge_anon_shmem_orders_within_size);
>> + unsigned long vm_flags = vma->vm_flags;
>> + /*
>> + * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
>> + * are enabled for this vma.
>> + */
>> + unsigned long orders = BIT(PMD_ORDER + 1) - 1;
>> + loff_t i_size;
>> + int order;
>> +
>> + if ((vm_flags & VM_NOHUGEPAGE) ||
>> + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>> + return 0;
>> +
>> + /* If the hardware/firmware marked hugepage support disabled. */
>> + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
>> + return 0;
>> +
>> + /*
>> + * Following the 'deny' semantics of the top level, force the huge
>> + * option off from all mounts.
>> + */
>> + if (shmem_huge == SHMEM_HUGE_DENY)
>> + return 0;
>> + /*
>> + * Only allow inherit orders if the top-level value is 'force', which
>> + * means non-PMD sized THP can not override 'huge' mount option now.
>> + */
>> + if (shmem_huge == SHMEM_HUGE_FORCE)
>> + return READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> + /* Allow mTHP that will be fully within i_size. */
>> + order = highest_order(within_size_orders);
>> + while (within_size_orders) {
>> + index = round_up(index + 1, order);
>> + i_size = round_up(i_size_read(inode), PAGE_SIZE);
>> + if (i_size >> PAGE_SHIFT >= index) {
>> + mask |= within_size_orders;
>> + break;
>> + }
>> +
>> + order = next_order(&within_size_orders, order);
>> + }
>> +
>> + if (vm_flags & VM_HUGEPAGE)
>> + mask |= READ_ONCE(huge_anon_shmem_orders_madvise);
>> +
>> + if (global_huge)
>> + mask |= READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> + return orders & mask;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> + struct address_space *mapping, pgoff_t index,
>> + unsigned long orders)
>> +{
>> + struct vm_area_struct *vma = vmf->vma;
>> + unsigned long pages;
>> + int order;
>> +
>> + orders = thp_vma_suitable_orders(vma, vmf->address, orders);
>> + if (!orders)
>> + return 0;
>> +
>> + /* Find the highest order that can add into the page cache */
>> + order = highest_order(orders);
>> + while (orders) {
>> + pages = 1UL << order;
>> + index = round_down(index, pages);
>> + if (!xa_find(&mapping->i_pages, &index,
>> + index + pages - 1, XA_PRESENT))
>> + break;
>> + order = next_order(&orders, order);
>> + }
>> +
>> + return orders;
>> +}
>> +#else
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> + struct vm_area_struct *vma, pgoff_t index,
>> + bool global_huge)
>> +{
>> + return 0;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> + struct address_space *mapping, pgoff_t index,
>> + unsigned long orders)
>> +{
>> + return 0;
>> +}
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>> static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
>> struct shmem_inode_info *info, pgoff_t index, int order)
>> {
>> @@ -1639,38 +1739,55 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
>> return (struct folio *)page;
>> }
>>
>> -static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
>> - struct inode *inode, pgoff_t index,
>> - struct mm_struct *fault_mm, bool huge)
>> +static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>> + gfp_t gfp, struct inode *inode, pgoff_t index,
>> + struct mm_struct *fault_mm, bool huge, unsigned long orders)
>
> IMO, it might be cleaner to drop the huge parameter and just set 'orders' as
> BIT(HPAGE_PMD_ORDER), then we only do the 'orders' check :)
>
> Likely:
>
> if (orders > 0) {
> if (vma && vma_is_anon_shmem(vma)) {
> ...
> } else if (orders & BIT(HPAGE_PMD_ORDER)) {
> ...
> }
> }

Yes, looks better.

>> {
>> struct address_space *mapping = inode->i_mapping;
>> struct shmem_inode_info *info = SHMEM_I(inode);
>> - struct folio *folio;
>> + struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
>> + unsigned long suitable_orders;
>> + struct folio *folio = NULL;
>> long pages;
>> - int error;
>> + int error, order;
>>
>> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
>> huge = false;
>
> Currently, if THP is disabled, 'huge' will fall back to order-0, but 'orders'
> does not, IIUC. How about we make both consistent if THP is disabled?
>
> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
> huge = false;
> orders = 0;
> }

If THP is disabled, the 'orders' must be 0, so no need to reset it.

2024-05-15 03:14:30

by Baolin Wang

[permalink] [raw]

Subject: Re: [PATCH v2 7/7] mm: shmem: add mTHP counters for anonymous shmem

On 2024/5/14 22:49, Lance Yang wrote:
> Hi Baolin,
>
> On Mon, May 13, 2024 at 1:08 PM Baolin Wang
> <[email protected]> wrote:
>>
>> Add mTHP counters for anonymous shmem.
>>
>> Signed-off-by: Baolin Wang <[email protected]>
>> ---
>> include/linux/huge_mm.h | 3 +++
>> mm/huge_memory.c | 6 ++++++
>> mm/shmem.c | 18 +++++++++++++++---
>> 3 files changed, 24 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index b5339210268d..e162498fef82 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -281,6 +281,9 @@ enum mthp_stat_item {
>> MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>> MTHP_STAT_ANON_SWPOUT,
>> MTHP_STAT_ANON_SWPOUT_FALLBACK,
>> + MTHP_STAT_FILE_ALLOC,
>> + MTHP_STAT_FILE_FALLBACK,
>> + MTHP_STAT_FILE_FALLBACK_CHARGE,
>> __MTHP_STAT_COUNT
>> };
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index d3080a8843f2..fcda6ae604f6 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -555,6 +555,9 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>> DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>> DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
>> DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
>> +DEFINE_MTHP_STAT_ATTR(file_alloc, MTHP_STAT_FILE_ALLOC);
>> +DEFINE_MTHP_STAT_ATTR(file_fallback, MTHP_STAT_FILE_FALLBACK);
>> +DEFINE_MTHP_STAT_ATTR(file_fallback_charge, MTHP_STAT_FILE_FALLBACK_CHARGE);
>>
>> static struct attribute *stats_attrs[] = {
>> &anon_fault_alloc_attr.attr,
>> @@ -562,6 +565,9 @@ static struct attribute *stats_attrs[] = {
>> &anon_fault_fallback_charge_attr.attr,
>> &anon_swpout_attr.attr,
>> &anon_swpout_fallback_attr.attr,
>> + &file_alloc_attr.attr,
>> + &file_fallback_attr.attr,
>> + &file_fallback_charge_attr.attr,
>> NULL,
>> };
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 8b020ff09c72..fd2cb2e73a21 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1786,6 +1786,9 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>>
>> if (pages == HPAGE_PMD_NR)
>> count_vm_event(THP_FILE_FALLBACK);
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + count_mthp_stat(order, MTHP_STAT_FILE_FALLBACK);
>> +#endif
>
> Seems like we don't need these conditional compilation directives here.
>
> The THP_FILE_FALLBACK above will result in a compilation error if
> CONFIG_TRANSPARENT_HUGEPAGE is not defined. So we don't
> worry about that :)
>
> See THP_FILE_FALLBACK in include/linux/vm_event_item.h.
>
>> order = next_order(&suitable_orders, order);
>> }
>> } else {
>> @@ -1805,9 +1808,15 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>> if (xa_find(&mapping->i_pages, &index,
>> index + pages - 1, XA_PRESENT)) {
>> error = -EEXIST;
>> - } else if (pages == HPAGE_PMD_NR) {
>> - count_vm_event(THP_FILE_FALLBACK);
>> - count_vm_event(THP_FILE_FALLBACK_CHARGE);
>> + } else if (pages > 1) {
>> + if (pages == HPAGE_PMD_NR) {
>> + count_vm_event(THP_FILE_FALLBACK);
>> + count_vm_event(THP_FILE_FALLBACK_CHARGE);
>> + }
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK);
>> + count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_FALLBACK_CHARGE);
>> +#endif
>
> As above.
>
>> }
>> goto unlock;
>> }
>> @@ -2178,6 +2187,9 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
>> if (!IS_ERR(folio)) {
>> if (folio_test_pmd_mappable(folio))
>> count_vm_event(THP_FILE_ALLOC);
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> + count_mthp_stat(folio_order(folio), MTHP_STAT_FILE_ALLOC);
>> +#endif
>
> As above.
>
> Perhaps we need to define MTHP_STAT_FILE_ALLOC and friends
> using a same way as THP_FILE_ALLOC, set as '{ BUILD_BUG(); 0; }'
> If CONFIG_TRANSPARENT_HUGEPAGE is not defined.
>
> Likely:
>
> #ifndef CONFIG_TRANSPARENT_HUGEPAGE
> #define MTHP_STAT_FILE_ALLOC ({ BUILD_BUG(); 0; })
> ...
> #endif

This is not enough, and we should also define a dummy function for
count_mthp_stat() when CONFIG_TRANSPARENT_HUGEPAGE is not enabled. I was
also hesitant about doing this before, but adding macro controls seems
relatively simple:)

Thanks for reviewing.