Currently the proactive compaction order is fixed to
COMPACTION_HPAGE_ORDER(9), it's OK in most machines with lots of
normal 4KB memory, but it's too high for the machines with small
normal memory, for example the machines with most memory configured
as 1GB hugetlbfs huge pages. In these machines the max order of
free pages is often below 9, and it's always below 9 even with hard
compaction. This will lead to proactive compaction be triggered very
frequently. In these machines we only care about order of 3 or 4.
This patch export the oder to proc and let it configurable
by user, and the default value is still COMPACTION_HPAGE_ORDER.
Signed-off-by: chukaiping <[email protected]>
---
include/linux/compaction.h | 1 +
kernel/sysctl.c | 10 ++++++++++
mm/compaction.c | 7 ++++---
3 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index ed4070e..151ccd1 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -83,6 +83,7 @@ static inline unsigned long compact_gap(unsigned int order)
#ifdef CONFIG_COMPACTION
extern int sysctl_compact_memory;
extern unsigned int sysctl_compaction_proactiveness;
+extern unsigned int sysctl_compaction_order;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
extern int sysctl_extfrag_threshold;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62fbd09..277df31 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -114,6 +114,7 @@
static int __maybe_unused neg_one = -1;
static int __maybe_unused two = 2;
static int __maybe_unused four = 4;
+static int __maybe_unused ten = 10;
static unsigned long zero_ul;
static unsigned long one_ul = 1;
static unsigned long long_max = LONG_MAX;
@@ -2871,6 +2872,15 @@ int proc_do_static_key(struct ctl_table *table, int write,
.extra2 = &one_hundred,
},
{
+ .procname = "compaction_order",
+ .data = &sysctl_compaction_order,
+ .maxlen = sizeof(sysctl_compaction_order),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &ten,
+ },
+ {
.procname = "extfrag_threshold",
.data = &sysctl_extfrag_threshold,
.maxlen = sizeof(int),
diff --git a/mm/compaction.c b/mm/compaction.c
index e04f447..a192996 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1925,16 +1925,16 @@ static bool kswapd_is_running(pg_data_t *pgdat)
/*
* A zone's fragmentation score is the external fragmentation wrt to the
- * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
+ * sysctl_compaction_order. It returns a value in the range [0, 100].
*/
static unsigned int fragmentation_score_zone(struct zone *zone)
{
- return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
+ return extfrag_for_order(zone, sysctl_compaction_order);
}
/*
* A weighted zone's fragmentation score is the external fragmentation
- * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
+ * wrt to the sysctl_compaction_order scaled by the zone's size. It
* returns a value in the range [0, 100].
*
* The scaling factor ensures that proactive compaction focuses on larger
@@ -2666,6 +2666,7 @@ static void compact_nodes(void)
* background. It takes values in the range [0, 100].
*/
unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
+unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
/*
* This is the entry point for compacting all nodes via
--
1.7.1
Hello.
On Mon, Apr 12, 2021 at 05:05:30PM +0800, chukaiping wrote:
> Currently the proactive compaction order is fixed to
> COMPACTION_HPAGE_ORDER(9), it's OK in most machines with lots of
> normal 4KB memory, but it's too high for the machines with small
> normal memory, for example the machines with most memory configured
> as 1GB hugetlbfs huge pages. In these machines the max order of
> free pages is often below 9, and it's always below 9 even with hard
> compaction. This will lead to proactive compaction be triggered very
> frequently. In these machines we only care about order of 3 or 4.
> This patch export the oder to proc and let it configurable
> by user, and the default value is still COMPACTION_HPAGE_ORDER.
>
> Signed-off-by: chukaiping <[email protected]>
> ---
> include/linux/compaction.h | 1 +
> kernel/sysctl.c | 10 ++++++++++
> mm/compaction.c | 7 ++++---
> 3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index ed4070e..151ccd1 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -83,6 +83,7 @@ static inline unsigned long compact_gap(unsigned int order)
> #ifdef CONFIG_COMPACTION
> extern int sysctl_compact_memory;
> extern unsigned int sysctl_compaction_proactiveness;
> +extern unsigned int sysctl_compaction_order;
> extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> void *buffer, size_t *length, loff_t *ppos);
> extern int sysctl_extfrag_threshold;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 62fbd09..277df31 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -114,6 +114,7 @@
> static int __maybe_unused neg_one = -1;
> static int __maybe_unused two = 2;
> static int __maybe_unused four = 4;
> +static int __maybe_unused ten = 10;
^^ does the upper limit have to be hard-coded like this?
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX;
> @@ -2871,6 +2872,15 @@ int proc_do_static_key(struct ctl_table *table, int write,
> .extra2 = &one_hundred,
> },
> {
> + .procname = "compaction_order",
> + .data = &sysctl_compaction_order,
> + .maxlen = sizeof(sysctl_compaction_order),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
I wonder what happens if this knob is set to 0. Have you tested such a
corner case?
> + .extra2 = &ten,
> + },
> + {
> .procname = "extfrag_threshold",
> .data = &sysctl_extfrag_threshold,
> .maxlen = sizeof(int),
> diff --git a/mm/compaction.c b/mm/compaction.c
> index e04f447..a192996 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1925,16 +1925,16 @@ static bool kswapd_is_running(pg_data_t *pgdat)
>
> /*
> * A zone's fragmentation score is the external fragmentation wrt to the
> - * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
> + * sysctl_compaction_order. It returns a value in the range [0, 100].
> */
> static unsigned int fragmentation_score_zone(struct zone *zone)
> {
> - return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
> + return extfrag_for_order(zone, sysctl_compaction_order);
> }
>
> /*
> * A weighted zone's fragmentation score is the external fragmentation
> - * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
> + * wrt to the sysctl_compaction_order scaled by the zone's size. It
> * returns a value in the range [0, 100].
> *
> * The scaling factor ensures that proactive compaction focuses on larger
> @@ -2666,6 +2666,7 @@ static void compact_nodes(void)
> * background. It takes values in the range [0, 100].
> */
> unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
> +unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
>
> /*
> * This is the entry point for compacting all nodes via
> --
> 1.7.1
>
--
Oleksandr Natalenko (post-factum)
Hi chukaiping,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on kees/for-next/pstore]
[also build test ERROR on linus/master v5.12-rc7 next-20210412]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/chukaiping/mm-compaction-let-proactive-compaction-order-configurable/20210412-172336
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
config: powerpc-cell_defconfig (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/a203321bf356e9514ca678c96119df72d6bfa803
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review chukaiping/mm-compaction-let-proactive-compaction-order-configurable/20210412-172336
git checkout a203321bf356e9514ca678c96119df72d6bfa803
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
In file included from arch/powerpc/include/asm/mmu.h:149,
from arch/powerpc/include/asm/lppaca.h:46,
from arch/powerpc/include/asm/paca.h:17,
from arch/powerpc/include/asm/current.h:13,
from include/linux/sched.h:12,
from include/linux/ratelimit.h:6,
from include/linux/dev_printk.h:16,
from include/linux/device.h:15,
from include/linux/node.h:18,
from include/linux/cpu.h:17,
from mm/compaction.c:11:
>> arch/powerpc/include/asm/page.h:39:28: error: initializer element is not constant
39 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
| ^
mm/compaction.c:66:32: note: in expansion of macro 'HUGETLB_PAGE_ORDER'
66 | #define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER
| ^~~~~~~~~~~~~~~~~~
mm/compaction.c:2669:54: note: in expansion of macro 'COMPACTION_HPAGE_ORDER'
2669 | unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
| ^~~~~~~~~~~~~~~~~~~~~~
vim +39 arch/powerpc/include/asm/page.h
5cd16ee934eafc include/asm-powerpc/page.h Michael Ellerman 2005-11-11 25
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 26 #ifndef __ASSEMBLY__
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 27 #ifndef CONFIG_HUGETLB_PAGE
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 28 #define HPAGE_SHIFT PAGE_SHIFT
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 29 #elif defined(CONFIG_PPC_BOOK3S_64)
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 30 extern unsigned int hpage_shift;
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 31 #define HPAGE_SHIFT hpage_shift
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 32 #elif defined(CONFIG_PPC_8xx)
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 33 #define HPAGE_SHIFT 19 /* 512k pages */
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 34 #elif defined(CONFIG_PPC_FSL_BOOK3E)
c5710cd2073503 arch/powerpc/include/asm/page.h Christophe Leroy 2019-04-26 35 #define HPAGE_SHIFT 22 /* 4M pages */
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 36 #endif
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 37 #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 38 #define HPAGE_MASK (~(HPAGE_SIZE - 1))
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 @39 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 40 #define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 41 #endif
41151e77a4d96e arch/powerpc/include/asm/page.h Becky Bruce 2011-06-28 42
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi chukaiping,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on kees/for-next/pstore]
[also build test ERROR on linus/master v5.12-rc7 next-20210412]
[cannot apply to hnaz-linux-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/chukaiping/mm-compaction-let-proactive-compaction-order-configurable/20210412-172336
base: https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/pstore
config: ia64-allmodconfig (attached as .config)
compiler: ia64-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/a203321bf356e9514ca678c96119df72d6bfa803
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review chukaiping/mm-compaction-let-proactive-compaction-order-configurable/20210412-172336
git checkout a203321bf356e9514ca678c96119df72d6bfa803
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=ia64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All errors (new ones prefixed by >>):
In file included from arch/ia64/include/asm/ptrace.h:46,
from arch/ia64/include/asm/processor.h:20,
from arch/ia64/include/asm/thread_info.h:12,
from include/linux/thread_info.h:58,
from include/asm-generic/preempt.h:5,
from ./arch/ia64/include/generated/asm/preempt.h:1,
from include/linux/preempt.h:78,
from include/linux/rcupdate.h:27,
from include/linux/rculist.h:11,
from include/linux/pid.h:5,
from include/linux/sched.h:14,
from include/linux/ratelimit.h:6,
from include/linux/dev_printk.h:16,
from include/linux/device.h:15,
from include/linux/node.h:18,
from include/linux/cpu.h:17,
from mm/compaction.c:11:
>> arch/ia64/include/asm/page.h:153:29: error: initializer element is not constant
153 | # define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
| ^
mm/compaction.c:66:32: note: in expansion of macro 'HUGETLB_PAGE_ORDER'
66 | #define COMPACTION_HPAGE_ORDER HUGETLB_PAGE_ORDER
| ^~~~~~~~~~~~~~~~~~
mm/compaction.c:2669:54: note: in expansion of macro 'COMPACTION_HPAGE_ORDER'
2669 | unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
| ^~~~~~~~~~~~~~~~~~~~~~
vim +153 arch/ia64/include/asm/page.h
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 149
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 150 #ifdef CONFIG_HUGETLB_PAGE
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 151 # define htlbpage_to_page(x) (((unsigned long) REGION_NUMBER(x) << 61) \
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 152 | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT)))
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 @153 # define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 154 extern unsigned int hpage_shift;
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 155 #endif
^1da177e4c3f41 include/asm-ia64/page.h Linus Torvalds 2005-04-16 156
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On Mon, 12 Apr 2021, chukaiping wrote:
> Currently the proactive compaction order is fixed to
> COMPACTION_HPAGE_ORDER(9), it's OK in most machines with lots of
> normal 4KB memory, but it's too high for the machines with small
> normal memory, for example the machines with most memory configured
> as 1GB hugetlbfs huge pages. In these machines the max order of
> free pages is often below 9, and it's always below 9 even with hard
> compaction. This will lead to proactive compaction be triggered very
> frequently. In these machines we only care about order of 3 or 4.
> This patch export the oder to proc and let it configurable
> by user, and the default value is still COMPACTION_HPAGE_ORDER.
>
I'm curious why you have proactive compaction enabled at all in this case?
The order-9 threshold is likely to optimize for hugepage availability, but
in your setup it appears that's not a goal.
So what benefit does proactive compaction provide if only done for order-3
or order-4?
> Signed-off-by: chukaiping <[email protected]>
> ---
> include/linux/compaction.h | 1 +
> kernel/sysctl.c | 10 ++++++++++
> mm/compaction.c | 7 ++++---
> 3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index ed4070e..151ccd1 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -83,6 +83,7 @@ static inline unsigned long compact_gap(unsigned int order)
> #ifdef CONFIG_COMPACTION
> extern int sysctl_compact_memory;
> extern unsigned int sysctl_compaction_proactiveness;
> +extern unsigned int sysctl_compaction_order;
> extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> void *buffer, size_t *length, loff_t *ppos);
> extern int sysctl_extfrag_threshold;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 62fbd09..277df31 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -114,6 +114,7 @@
> static int __maybe_unused neg_one = -1;
> static int __maybe_unused two = 2;
> static int __maybe_unused four = 4;
> +static int __maybe_unused ten = 10;
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX;
> @@ -2871,6 +2872,15 @@ int proc_do_static_key(struct ctl_table *table, int write,
> .extra2 = &one_hundred,
> },
> {
> + .procname = "compaction_order",
> + .data = &sysctl_compaction_order,
> + .maxlen = sizeof(sysctl_compaction_order),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
> + .extra2 = &ten,
> + },
> + {
> .procname = "extfrag_threshold",
> .data = &sysctl_extfrag_threshold,
> .maxlen = sizeof(int),
> diff --git a/mm/compaction.c b/mm/compaction.c
> index e04f447..a192996 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1925,16 +1925,16 @@ static bool kswapd_is_running(pg_data_t *pgdat)
>
> /*
> * A zone's fragmentation score is the external fragmentation wrt to the
> - * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
> + * sysctl_compaction_order. It returns a value in the range [0, 100].
> */
> static unsigned int fragmentation_score_zone(struct zone *zone)
> {
> - return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
> + return extfrag_for_order(zone, sysctl_compaction_order);
> }
>
> /*
> * A weighted zone's fragmentation score is the external fragmentation
> - * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
> + * wrt to the sysctl_compaction_order scaled by the zone's size. It
> * returns a value in the range [0, 100].
> *
> * The scaling factor ensures that proactive compaction focuses on larger
> @@ -2666,6 +2666,7 @@ static void compact_nodes(void)
> * background. It takes values in the range [0, 100].
> */
> unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
> +unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
>
> /*
> * This is the entry point for compacting all nodes via
> --
> 1.7.1
>
>
Hi Oleksandr,
Please see my answer inline.
BR,
Chu Kaiping
-----?ʼ?ԭ??-----
??????: Oleksandr Natalenko <[email protected]>
????ʱ??: 2021??4??13?? 0:58
?ռ???: Chu,Kaiping <[email protected]>
????: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
????: Re: [PATCH] mm/compaction:let proactive compaction order configurable
Hello.
On Mon, Apr 12, 2021 at 05:05:30PM +0800, chukaiping wrote:
> Currently the proactive compaction order is fixed to
> COMPACTION_HPAGE_ORDER(9), it's OK in most machines with lots of
> normal 4KB memory, but it's too high for the machines with small
> normal memory, for example the machines with most memory configured as
> 1GB hugetlbfs huge pages. In these machines the max order of free
> pages is often below 9, and it's always below 9 even with hard
> compaction. This will lead to proactive compaction be triggered very
> frequently. In these machines we only care about order of 3 or 4.
> This patch export the oder to proc and let it configurable by user,
> and the default value is still COMPACTION_HPAGE_ORDER.
>
> Signed-off-by: chukaiping <[email protected]>
> ---
> include/linux/compaction.h | 1 +
> kernel/sysctl.c | 10 ++++++++++
> mm/compaction.c | 7 ++++---
> 3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index ed4070e..151ccd1 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -83,6 +83,7 @@ static inline unsigned long compact_gap(unsigned int
> order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory;
> extern unsigned int sysctl_compaction_proactiveness;
> +extern unsigned int sysctl_compaction_order;
> extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> void *buffer, size_t *length, loff_t *ppos); extern int
> sysctl_extfrag_threshold; diff --git a/kernel/sysctl.c
> b/kernel/sysctl.c index 62fbd09..277df31 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -114,6 +114,7 @@
> static int __maybe_unused neg_one = -1; static int __maybe_unused
> two = 2; static int __maybe_unused four = 4;
> +static int __maybe_unused ten = 10;
^^ does the upper limit have to be hard-coded like this?
--> the max order of buddy is defined by MAX_ORDER, I will change it to MAX_ORDER is next patch.
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX;
> @@ -2871,6 +2872,15 @@ int proc_do_static_key(struct ctl_table *table, int write,
> .extra2 = &one_hundred,
> },
> {
> + .procname = "compaction_order",
> + .data = &sysctl_compaction_order,
> + .maxlen = sizeof(sysctl_compaction_order),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
I wonder what happens if this knob is set to 0. Have you tested such a
corner case?
--> in theory, 0 is also a configurable value, but the fragment index of order 0 is always 0, so it won't do any proactive compaction. I have had a test, if set order to 0, there is no any error, but proactive compaction won't happen.
> + .extra2 = &ten,
> + },
> + {
> .procname = "extfrag_threshold",
> .data = &sysctl_extfrag_threshold,
> .maxlen = sizeof(int),
> diff --git a/mm/compaction.c b/mm/compaction.c
> index e04f447..a192996 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1925,16 +1925,16 @@ static bool kswapd_is_running(pg_data_t *pgdat)
>
> /*
> * A zone's fragmentation score is the external fragmentation wrt to the
> - * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
> + * sysctl_compaction_order. It returns a value in the range [0, 100].
> */
> static unsigned int fragmentation_score_zone(struct zone *zone)
> {
> - return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
> + return extfrag_for_order(zone, sysctl_compaction_order);
> }
>
> /*
> * A weighted zone's fragmentation score is the external fragmentation
> - * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
> + * wrt to the sysctl_compaction_order scaled by the zone's size. It
> * returns a value in the range [0, 100].
> *
> * The scaling factor ensures that proactive compaction focuses on larger
> @@ -2666,6 +2666,7 @@ static void compact_nodes(void)
> * background. It takes values in the range [0, 100].
> */
> unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
> +unsigned int __read_mostly sysctl_compaction_order = COMPACTION_HPAGE_ORDER;
>
> /*
> * This is the entry point for compacting all nodes via
> --
> 1.7.1
>
--
Oleksandr Natalenko (post-factum)
Hi Rientjes,
In our case we don't care about the allocation delay of transparent huge pages, but the proactive compaction is really useful to us. If no proactive compaction currently kernel will do memory compaction only when the allocation of high order memory will fail, while this is too late. When the machine is in heavy load, many processes maybe trigger compaction at the same time, this will lead to serious lock contention, and will make the machine very slowly.
Do proactive compaction from time to time will keep the fragment index at low level, and reduce soft lockup rate.
The order of 3 or 4 is only an experience value, we may change it according to machine load.
BR,
Chu Kaiping
-----?ʼ?ԭ??-----
??????: David Rientjes <[email protected]>
????ʱ??: 2021??4??13?? 2:26
?ռ???: Chu,Kaiping <[email protected]>
????: [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]; [email protected]
????: Re: [PATCH] mm/compaction:let proactive compaction order configurable
On Mon, 12 Apr 2021, chukaiping wrote:
> Currently the proactive compaction order is fixed to
> COMPACTION_HPAGE_ORDER(9), it's OK in most machines with lots of
> normal 4KB memory, but it's too high for the machines with small
> normal memory, for example the machines with most memory configured as
> 1GB hugetlbfs huge pages. In these machines the max order of free
> pages is often below 9, and it's always below 9 even with hard
> compaction. This will lead to proactive compaction be triggered very
> frequently. In these machines we only care about order of 3 or 4.
> This patch export the oder to proc and let it configurable by user,
> and the default value is still COMPACTION_HPAGE_ORDER.
>
I'm curious why you have proactive compaction enabled at all in this case?
The order-9 threshold is likely to optimize for hugepage availability, but in your setup it appears that's not a goal.
So what benefit does proactive compaction provide if only done for order-3 or order-4?
> Signed-off-by: chukaiping <[email protected]>
> ---
> include/linux/compaction.h | 1 +
> kernel/sysctl.c | 10 ++++++++++
> mm/compaction.c | 7 ++++---
> 3 files changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/compaction.h b/include/linux/compaction.h
> index ed4070e..151ccd1 100644
> --- a/include/linux/compaction.h
> +++ b/include/linux/compaction.h
> @@ -83,6 +83,7 @@ static inline unsigned long compact_gap(unsigned int
> order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory;
> extern unsigned int sysctl_compaction_proactiveness;
> +extern unsigned int sysctl_compaction_order;
> extern int sysctl_compaction_handler(struct ctl_table *table, int write,
> void *buffer, size_t *length, loff_t *ppos); extern int
> sysctl_extfrag_threshold; diff --git a/kernel/sysctl.c
> b/kernel/sysctl.c index 62fbd09..277df31 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -114,6 +114,7 @@
> static int __maybe_unused neg_one = -1; static int __maybe_unused
> two = 2; static int __maybe_unused four = 4;
> +static int __maybe_unused ten = 10;
> static unsigned long zero_ul;
> static unsigned long one_ul = 1;
> static unsigned long long_max = LONG_MAX; @@ -2871,6 +2872,15 @@ int
> proc_do_static_key(struct ctl_table *table, int write,
> .extra2 = &one_hundred,
> },
> {
> + .procname = "compaction_order",
> + .data = &sysctl_compaction_order,
> + .maxlen = sizeof(sysctl_compaction_order),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = SYSCTL_ZERO,
> + .extra2 = &ten,
> + },
> + {
> .procname = "extfrag_threshold",
> .data = &sysctl_extfrag_threshold,
> .maxlen = sizeof(int),
> diff --git a/mm/compaction.c b/mm/compaction.c index e04f447..a192996
> 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -1925,16 +1925,16 @@ static bool kswapd_is_running(pg_data_t
> *pgdat)
>
> /*
> * A zone's fragmentation score is the external fragmentation wrt to
> the
> - * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
> + * sysctl_compaction_order. It returns a value in the range [0, 100].
> */
> static unsigned int fragmentation_score_zone(struct zone *zone) {
> - return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
> + return extfrag_for_order(zone, sysctl_compaction_order);
> }
>
> /*
> * A weighted zone's fragmentation score is the external
> fragmentation
> - * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
> + * wrt to the sysctl_compaction_order scaled by the zone's size. It
> * returns a value in the range [0, 100].
> *
> * The scaling factor ensures that proactive compaction focuses on
> larger @@ -2666,6 +2666,7 @@ static void compact_nodes(void)
> * background. It takes values in the range [0, 100].
> */
> unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
> +unsigned int __read_mostly sysctl_compaction_order =
> +COMPACTION_HPAGE_ORDER;
>
> /*
> * This is the entry point for compacting all nodes via
> --
> 1.7.1
>
>