On systems without CMA or (MEMORY_ISOLATION && COMPACTION) activated but
that support gigantic pages, boottime reserved gigantic pages can not be
freed at all. This patch simply enables the possibility to hand back
those pages to memory allocator.
This patch also renames:
- the triplet CMA or (MEMORY_ISOLATION && COMPACTION) into CONTIG_ALLOC,
and gets rid of all use of it in architecture specific code (and then
removes ARCH_HAS_GIGANTIC_PAGE config).
- gigantic_page_supported to make it more accurate: this value being false
does not mean that the system cannot use gigantic pages, it just means that
runtime allocation of gigantic pages is not supported, one can still
allocate boottime gigantic pages if the architecture supports it.
Signed-off-by: Alexandre Ghiti <[email protected]>
---
Changes in v3 as suggested by Vlastimil Babka and Dave Hansen:
- config definition was wrong and is now in mm/Kconfig
- COMPACTION_CORE was renamed in CONTIG_ALLOC
Changes in v2 as suggested by Vlastimil Babka:
- Get rid of ARCH_HAS_GIGANTIC_PAGE
- Get rid of architecture specific gigantic_page_supported
- Factorize CMA or (MEMORY_ISOLATION && COMPACTION) into COMPACTION_CORE
Compiles on all arches and validated on riscv
arch/arm64/Kconfig | 1 -
arch/arm64/include/asm/hugetlb.h | 4 --
arch/powerpc/include/asm/book3s/64/hugetlb.h | 7 ----
arch/powerpc/platforms/Kconfig.cputype | 1 -
arch/s390/Kconfig | 1 -
arch/s390/include/asm/hugetlb.h | 3 --
arch/x86/Kconfig | 1 -
arch/x86/include/asm/hugetlb.h | 4 --
arch/x86/mm/hugetlbpage.c | 2 +-
fs/Kconfig | 3 --
include/linux/gfp.h | 4 +-
mm/Kconfig | 5 +++
mm/hugetlb.c | 44 +++++++++++---------
mm/page_alloc.c | 7 ++--
14 files changed, 35 insertions(+), 52 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d366127..6c778046b9f7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -18,7 +18,6 @@ config ARM64
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index fb6609875455..59893e766824 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -65,8 +65,4 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
#include <asm-generic/hugetlb.h>
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void) { return true; }
-#endif
-
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 5b0177733994..d04a0bcc2f1c 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -32,13 +32,6 @@ static inline int hstate_get_psize(struct hstate *hstate)
}
}
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void)
-{
- return true;
-}
-#endif
-
/* hugepd entry valid bit */
#define HUGEPD_VAL_BITS (0x8000000000000000UL)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 8c7464c3f27f..3e629dfb5efa 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -319,7 +319,6 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
- select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ed554b09eb3f..556860f290e9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -69,7 +69,6 @@ config S390
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_MEMORY
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d1afa58a4b6..bd191560efcf 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -116,7 +116,4 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
return pte_modify(pte, newprot);
}
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void) { return true; }
-#endif
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 68261430fe6e..2fd983e2b2f6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,7 +23,6 @@ config X86_64
def_bool y
depends on 64BIT
# Options that are inherently 64-bit kernel only:
- select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_SUPPORTS_INT128
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_SOFT_DIRTY
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 7469d321f072..f65cfb48cfdd 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -17,8 +17,4 @@ static inline void arch_clear_hugepage_flags(struct page *page)
{
}
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static inline bool gigantic_page_supported(void) { return true; }
-#endif
-
#endif /* _ASM_X86_HUGETLB_H */
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 92e4c4b85bba..fab095362c50 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -203,7 +203,7 @@ static __init int setup_hugepagesz(char *opt)
}
__setup("hugepagesz=", setup_hugepagesz);
-#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
+#ifdef CONFIG_CONTIG_ALLOC
static __init int gigantic_pages_init(void)
{
/* With compaction or CMA we can allocate gigantic pages at runtime */
diff --git a/fs/Kconfig b/fs/Kconfig
index ac474a61be37..e76ebc71af7b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -207,9 +207,6 @@ config HUGETLB_PAGE
config MEMFD_CREATE
def_bool TMPFS || HUGETLBFS
-config ARCH_HAS_GIGANTIC_PAGE
- bool
-
source "fs/configfs/Kconfig"
source "fs/efivarfs/Kconfig"
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5f5e25fd6149..58ea44bf75de 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -585,12 +585,12 @@ static inline bool pm_suspended_storage(void)
}
#endif /* CONFIG_PM_SLEEP */
-#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
+#ifdef CONFIG_CONTIG_ALLOC
/* The below functions must be run on a range from a single zone. */
extern int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype, gfp_t gfp_mask);
-extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
#endif
+extern void free_contig_range(unsigned long pfn, unsigned int nr_pages);
#ifdef CONFIG_CMA
/* CMA stuff */
diff --git a/mm/Kconfig b/mm/Kconfig
index 25c71eb8a7db..138a8df9b813 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -252,12 +252,17 @@ config MIGRATION
pages as migration can relocate pages to satisfy a huge page
allocation instead of reclaiming.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
config ARCH_ENABLE_THP_MIGRATION
bool
+config CONTIG_ALLOC
+ def_bool y
+ depends on (MEMORY_ISOLATION && COMPACTION) || CMA
+
config PHYS_ADDR_T_64BIT
def_bool 64BIT
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index afef61656c1e..e686c92212e9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1035,7 +1035,6 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
((node = hstate_next_node_to_free(hs, mask)) || 1); \
nr_nodes--)
-#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
static void destroy_compound_gigantic_page(struct page *page,
unsigned int order)
{
@@ -1058,6 +1057,12 @@ static void free_gigantic_page(struct page *page, unsigned int order)
free_contig_range(page_to_pfn(page), 1 << order);
}
+static inline bool gigantic_page_runtime_allocation_supported(void)
+{
+ return IS_ENABLED(CONFIG_CONTIG_ALLOC);
+}
+
+#ifdef CONFIG_CONTIG_ALLOC
static int __alloc_gigantic_page(unsigned long start_pfn,
unsigned long nr_pages, gfp_t gfp_mask)
{
@@ -1143,22 +1148,15 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
static void prep_compound_gigantic_page(struct page *page, unsigned int order);
-#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static inline bool gigantic_page_supported(void) { return false; }
+#else /* !CONFIG_CONTIG_ALLOC */
static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask) { return NULL; }
-static inline void free_gigantic_page(struct page *page, unsigned int order) { }
-static inline void destroy_compound_gigantic_page(struct page *page,
- unsigned int order) { }
#endif
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
- if (hstate_is_gigantic(h) && !gigantic_page_supported())
- return;
-
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -2276,13 +2274,20 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
}
#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
-static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
+static int set_max_huge_pages(struct hstate *h, unsigned long count,
nodemask_t *nodes_allowed)
{
unsigned long min_count, ret;
- if (hstate_is_gigantic(h) && !gigantic_page_supported())
- return h->max_huge_pages;
+ if (hstate_is_gigantic(h) &&
+ !gigantic_page_runtime_allocation_supported()) {
+ spin_lock(&hugetlb_lock);
+ if (count > persistent_huge_pages(h)) {
+ spin_unlock(&hugetlb_lock);
+ return -EINVAL;
+ }
+ goto decrease_pool;
+ }
/*
* Increase the pool size
@@ -2322,6 +2327,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
goto out;
}
+decrease_pool:
/*
* Decrease the pool size
* First return free pages to the buddy allocator (being careful
@@ -2350,9 +2356,10 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
break;
}
out:
- ret = persistent_huge_pages(h);
+ h->max_huge_pages = persistent_huge_pages(h);
spin_unlock(&hugetlb_lock);
- return ret;
+
+ return 0;
}
#define HSTATE_ATTR_RO(_name) \
@@ -2404,11 +2411,6 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
int err;
NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
- if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
- err = -EINVAL;
- goto out;
- }
-
if (nid == NUMA_NO_NODE) {
/*
* global hstate attribute
@@ -2428,7 +2430,9 @@ static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
} else
nodes_allowed = &node_states[N_MEMORY];
- h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
+ err = set_max_huge_pages(h, count, nodes_allowed);
+ if (err)
+ goto out;
if (nodes_allowed != &node_states[N_MEMORY])
NODEMASK_FREE(nodes_allowed);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 35fdde041f5c..8ce96c59e446 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8024,8 +8024,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
return true;
}
-#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
-
+#ifdef CONFIG_CONTIG_ALLOC
static unsigned long pfn_max_align_down(unsigned long pfn)
{
return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES,
@@ -8235,8 +8234,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
pfn_max_align_up(end), migratetype);
return ret;
}
+#endif
-void free_contig_range(unsigned long pfn, unsigned nr_pages)
+void free_contig_range(unsigned long pfn, unsigned int nr_pages)
{
unsigned int count = 0;
@@ -8248,7 +8248,6 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
}
WARN(count != 0, "%d pages are still in use!\n", count);
}
-#endif
#ifdef CONFIG_MEMORY_HOTPLUG
/*
--
2.20.1
On 2/14/19 8:31 PM, Alexandre Ghiti wrote:
> On systems without CMA or (MEMORY_ISOLATION && COMPACTION) activated but
> that support gigantic pages, boottime reserved gigantic pages can not be
> freed at all. This patch simply enables the possibility to hand back
> those pages to memory allocator.
>
> This patch also renames:
>
> - the triplet CMA or (MEMORY_ISOLATION && COMPACTION) into CONTIG_ALLOC,
> and gets rid of all use of it in architecture specific code (and then
> removes ARCH_HAS_GIGANTIC_PAGE config).
> - gigantic_page_supported to make it more accurate: this value being false
> does not mean that the system cannot use gigantic pages, it just means that
> runtime allocation of gigantic pages is not supported, one can still
> allocate boottime gigantic pages if the architecture supports it.
>
> Signed-off-by: Alexandre Ghiti <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Thanks!
...
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -252,12 +252,17 @@ config MIGRATION
> pages as migration can relocate pages to satisfy a huge page
> allocation instead of reclaiming.
>
> +
Stray newline? No need to resend, Andrew can fix up.
Ah, he wasn't in To:, adding.
> config ARCH_ENABLE_HUGEPAGE_MIGRATION
> bool
>
> config ARCH_ENABLE_THP_MIGRATION
> bool
>
> +config CONTIG_ALLOC
> + def_bool y
> + depends on (MEMORY_ISOLATION && COMPACTION) || CMA
> +
> config PHYS_ADDR_T_64BIT
> def_bool 64BIT
>
> -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
> +#ifdef CONFIG_CONTIG_ALLOC
> /* The below functions must be run on a range from a single zone. */
> extern int alloc_contig_range(unsigned long start, unsigned long end,
> unsigned migratetype, gfp_t gfp_mask);
> -extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
> #endif
> +extern void free_contig_range(unsigned long pfn, unsigned int nr_pages);
There's a lot of stuff going on in this patch. Adding/removing config
options. Please get rid of these superfluous changes or at least break
them out.
> #ifdef CONFIG_CMA
> /* CMA stuff */
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 25c71eb8a7db..138a8df9b813 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -252,12 +252,17 @@ config MIGRATION
> pages as migration can relocate pages to satisfy a huge page
> allocation instead of reclaiming.
>
> +
> config ARCH_ENABLE_HUGEPAGE_MIGRATION
> bool
Like this. :)
> config ARCH_ENABLE_THP_MIGRATION
> bool
>
> +config CONTIG_ALLOC
> + def_bool y
> + depends on (MEMORY_ISOLATION && COMPACTION) || CMA
> +
> config PHYS_ADDR_T_64BIT
> def_bool 64BIT
Please think carefully though the Kconfig dependencies. 'select' is
*not* the same as 'depends on'.
This replaces a bunch of arch-specific "select ARCH_HAS_GIGANTIC_PAGE"
with a 'depends on'. I *think* that ends up being OK, but it absolutely
needs to be addressed in the changelog about why *you* think it is OK
and why it doesn't change the functionality of any of the patched
architetures.
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index afef61656c1e..e686c92212e9 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1035,7 +1035,6 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
> ((node = hstate_next_node_to_free(hs, mask)) || 1); \
> nr_nodes--)
>
> -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
> static void destroy_compound_gigantic_page(struct page *page,
> unsigned int order)
> {
Whats the result of this #ifdef removal? A universally larger kernel
even for architectures that do not support runtime gigantic page
alloc/free? That doesn't seem like a good thing.
> @@ -1058,6 +1057,12 @@ static void free_gigantic_page(struct page *page, unsigned int order)
> free_contig_range(page_to_pfn(page), 1 << order);
> }
>
> +static inline bool gigantic_page_runtime_allocation_supported(void)
> +{
> + return IS_ENABLED(CONFIG_CONTIG_ALLOC);
> +}
Why bother having this function? Why don't the callers just check the
config option directly?
> +#ifdef CONFIG_CONTIG_ALLOC
> static int __alloc_gigantic_page(unsigned long start_pfn,
> unsigned long nr_pages, gfp_t gfp_mask)
> {
> @@ -1143,22 +1148,15 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
> static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
> static void prep_compound_gigantic_page(struct page *page, unsigned int order);
>
> -#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
> -static inline bool gigantic_page_supported(void) { return false; }
> +#else /* !CONFIG_CONTIG_ALLOC */
> static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
> int nid, nodemask_t *nodemask) { return NULL; }
> -static inline void free_gigantic_page(struct page *page, unsigned int order) { }
> -static inline void destroy_compound_gigantic_page(struct page *page,
> - unsigned int order) { }
> #endif
>
> static void update_and_free_page(struct hstate *h, struct page *page)
> {
> int i;
>
> - if (hstate_is_gigantic(h) && !gigantic_page_supported())
> - return;
I don't get the point of removing this check. Logically, this reads as
checking if the architecture supports gigantic hstates and has nothing
to do with allocation.
> h->nr_huge_pages--;
> h->nr_huge_pages_node[page_to_nid(page)]--;
> for (i = 0; i < pages_per_huge_page(h); i++) {
> @@ -2276,13 +2274,20 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
> }
>
> #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
> -static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
> +static int set_max_huge_pages(struct hstate *h, unsigned long count,
> nodemask_t *nodes_allowed)
> {
> unsigned long min_count, ret;
>
> - if (hstate_is_gigantic(h) && !gigantic_page_supported())
> - return h->max_huge_pages;
> + if (hstate_is_gigantic(h) &&
> + !gigantic_page_runtime_allocation_supported()) {
The indentation here is wrong and reduces readability. Needs to be like
this:
if (hstate_is_gigantic(h) &&
!gigantic_page_runtime_allocation_supported()) {
> + spin_lock(&hugetlb_lock);
> + if (count > persistent_huge_pages(h)) {
> + spin_unlock(&hugetlb_lock);
> + return -EINVAL;
> + }
> + goto decrease_pool;
> + }
Needs comments.
/* Gigantic pages can be freed but not allocated */
or something.
On 2/15/19 12:34 PM, Dave Hansen wrote:
>> -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA)
>> +#ifdef CONFIG_CONTIG_ALLOC
>> /* The below functions must be run on a range from a single zone. */
>> extern int alloc_contig_range(unsigned long start, unsigned long end,
>> unsigned migratetype, gfp_t gfp_mask);
>> -extern void free_contig_range(unsigned long pfn, unsigned nr_pages);
>> #endif
>> +extern void free_contig_range(unsigned long pfn, unsigned int nr_pages);
> There's a lot of stuff going on in this patch. Adding/removing config
> options. Please get rid of these superfluous changes or at least break
> them out.
I agree that this patch does a lot of things. I am going at least to
split it
into 2 separate patches, one suggested-by Vlastimil regarding the renaming
of MEMORY_ISOLATION && COMPACTION || CMA, and another that indeed
does what was primarily intended.
>> #ifdef CONFIG_CMA
>> /* CMA stuff */
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index 25c71eb8a7db..138a8df9b813 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -252,12 +252,17 @@ config MIGRATION
>> pages as migration can relocate pages to satisfy a huge page
>> allocation instead of reclaiming.
>>
>> +
>> config ARCH_ENABLE_HUGEPAGE_MIGRATION
>> bool
> Like this. :)
My apologies for that.
>> config ARCH_ENABLE_THP_MIGRATION
>> bool
>>
>> +config CONTIG_ALLOC
>> + def_bool y
>> + depends on (MEMORY_ISOLATION && COMPACTION) || CMA
>> +
>> config PHYS_ADDR_T_64BIT
>> def_bool 64BIT
> Please think carefully though the Kconfig dependencies. 'select' is
> *not* the same as 'depends on'.
>
> This replaces a bunch of arch-specific "select ARCH_HAS_GIGANTIC_PAGE"
> with a 'depends on'. I *think* that ends up being OK, but it absolutely
> needs to be addressed in the changelog about why *you* think it is OK
> and why it doesn't change the functionality of any of the patched
> architetures.
Ok.
>> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
>> index afef61656c1e..e686c92212e9 100644
>> --- a/mm/hugetlb.c
>> +++ b/mm/hugetlb.c
>> @@ -1035,7 +1035,6 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
>> ((node = hstate_next_node_to_free(hs, mask)) || 1); \
>> nr_nodes--)
>>
>> -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
>> static void destroy_compound_gigantic_page(struct page *page,
>> unsigned int order)
>> {
> Whats the result of this #ifdef removal? A universally larger kernel
> even for architectures that do not support runtime gigantic page
> alloc/free? That doesn't seem like a good thing.
Ok, I agree, now that we removed the "wrong" definition of
ARCH_HAS_GIGANTIC_PAGE,
we can actually use this define for architectures to show they support
gigantic pages
and avoid the problem you mention. Thanks.
>> @@ -1058,6 +1057,12 @@ static void free_gigantic_page(struct page *page, unsigned int order)
>> free_contig_range(page_to_pfn(page), 1 << order);
>> }
>>
>> +static inline bool gigantic_page_runtime_allocation_supported(void)
>> +{
>> + return IS_ENABLED(CONFIG_CONTIG_ALLOC);
>> +}
> Why bother having this function? Why don't the callers just check the
> config option directly?
Ok, this function is only used once in set_max_huge_pages where you
mention the need for a comment, so I can get rid of it. Thanks.
>> +#ifdef CONFIG_CONTIG_ALLOC
>> static int __alloc_gigantic_page(unsigned long start_pfn,
>> unsigned long nr_pages, gfp_t gfp_mask)
>> {
>> @@ -1143,22 +1148,15 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
>> static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
>> static void prep_compound_gigantic_page(struct page *page, unsigned int order);
>>
>> -#else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
>> -static inline bool gigantic_page_supported(void) { return false; }
>> +#else /* !CONFIG_CONTIG_ALLOC */
>> static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
>> int nid, nodemask_t *nodemask) { return NULL; }
>> -static inline void free_gigantic_page(struct page *page, unsigned int order) { }
>> -static inline void destroy_compound_gigantic_page(struct page *page,
>> - unsigned int order) { }
>> #endif
>>
>> static void update_and_free_page(struct hstate *h, struct page *page)
>> {
>> int i;
>>
>> - if (hstate_is_gigantic(h) && !gigantic_page_supported())
>> - return;
> I don't get the point of removing this check. Logically, this reads as
> checking if the architecture supports gigantic hstates and has nothing
> to do with allocation.
I think this check was wrong from the beginning: gigantic_page_supported()
was only checking (MEMORY_ISOLATION && COMPACTION) || CMA, which has
nothing to do with the capability to free gigantic pages.
But then I went through all the architectures to see if removing this
test could
affect any of them. And I noticed that if an architecture supports gigantic
page without advertising it with ARCH_HAS_GIGANTIC_PAGE, then it would
decrement the number of free huge page but would not actually free the
pages.
I found at least 2 archs that have gigantic pages, but do not allow
runtime allocation nor freeing of those pages because they do not define
the (wrong) ARCH_HAS_GIGANTIC_PAGE:
- ia64 has HPAGE_SHIFT_DEFAULT = 28, with PAGE_SHIFT = 14
- sh has max HPAGE_SHIFT = 29 and max PAGE_SHIFT = 16
with default MAX_ORDER = 11, both architectures support gigantic pages.
So I'm going to propose a patch that selects the (right)
ARCH_HAS_GIGANTIC_PAGE
for those archs, because I think they should be able to free their boottime
gigantic pages.
Regarding this check, we can either remove it if we are sure that
every architecture that has gigantic pages selects ARCH_HAS_GIGANTIC_PAGE,
or leaving it in case some future archs forget to select it.
I'd rather patch all archs so that they can at least free gigantic pages and
then remove the test since hstate_is_gigantic would imply
gigantic_page_supported.
I will propose something like that if you agree.
>> h->nr_huge_pages--;
>> h->nr_huge_pages_node[page_to_nid(page)]--;
>> for (i = 0; i < pages_per_huge_page(h); i++) {
>> @@ -2276,13 +2274,20 @@ static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
>> }
>>
>> #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
>> -static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
>> +static int set_max_huge_pages(struct hstate *h, unsigned long count,
>> nodemask_t *nodes_allowed)
>> {
>> unsigned long min_count, ret;
>>
>> - if (hstate_is_gigantic(h) && !gigantic_page_supported())
>> - return h->max_huge_pages;
>> + if (hstate_is_gigantic(h) &&
>> + !gigantic_page_runtime_allocation_supported()) {
> The indentation here is wrong and reduces readability. Needs to be like
> this:
>
> if (hstate_is_gigantic(h) &&
> !gigantic_page_runtime_allocation_supported()) {
This will disappear with your previous remark, thanks.
>> + spin_lock(&hugetlb_lock);
>> + if (count > persistent_huge_pages(h)) {
>> + spin_unlock(&hugetlb_lock);
>> + return -EINVAL;
>> + }
>> + goto decrease_pool;
>> + }
> Needs comments.
>
> /* Gigantic pages can be freed but not allocated */
>
> or something.
>
Ok, I agree, I'll add that and another sentence regarding the removal
of gigantic_page_runtime_allocation_supported.
Thank you Dave for your comments !
Alex