In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
MIGRATE_MOVABLE.
MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
order that Linux kernel want.
If a system that has a lot of user space program is running, for
instance, an Android board, most of memory is in MIGRATE_MOVABLE and
allocated. Before function __rmqueue_fallback get memory from
MIGRATE_CMA, the oom_killer will kill a task to release memory when
kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
This status is odd. The MIGRATE_CMA has a lot free memory but Linux
kernel kill some tasks to release memory.
This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
be more aggressive about allocation.
If function CMA_AGGRESSIVE is available, when Linux kernel call function
__rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
doesn't have enough pages for allocation, go back to allocate memory from
MIGRATE_MOVABLE.
Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
Function shrink_all_memory try to free `nr_to_reclaim' of memory.
CMA_AGGRESSIVE_SHRINK function will call this functon to free `nr_to_reclaim' of
memory. It need different scan_control with current caller function
hibernate_preallocate_memory.
If hibernation is true, the caller is hibernate_preallocate_memory.
if not, the caller is CMA alloc function.
Signed-off-by: Hui Zhu <[email protected]>
---
include/linux/swap.h | 3 ++-
kernel/power/snapshot.c | 2 +-
mm/vmscan.c | 19 +++++++++++++------
3 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 37a585b..9f2cb43 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,7 +335,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
struct zone *zone,
unsigned long *nr_scanned);
-extern unsigned long shrink_all_memory(unsigned long nr_pages);
+extern unsigned long shrink_all_memory(unsigned long nr_pages,
+ bool hibernation);
extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
extern unsigned long vm_total_pages;
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 791a618..a00fc35 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1657,7 +1657,7 @@ int hibernate_preallocate_memory(void)
* NOTE: If this is not done, performance will be hurt badly in some
* test cases.
*/
- shrink_all_memory(saveable - size);
+ shrink_all_memory(saveable - size, true);
/*
* The number of saveable pages in memory was too high, so apply some
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dcb4707..fdcfa30 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3404,7 +3404,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
wake_up_interruptible(&pgdat->kswapd_wait);
}
-#ifdef CONFIG_HIBERNATION
+#if defined CONFIG_HIBERNATION || defined CONFIG_CMA_AGGRESSIVE
/*
* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
* freed pages.
@@ -3413,22 +3413,29 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
* LRU order by reclaiming preferentially
* inactive > active > active referenced > active mapped
*/
-unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
+unsigned long shrink_all_memory(unsigned long nr_to_reclaim, bool hibernation)
{
struct reclaim_state reclaim_state;
struct scan_control sc = {
.nr_to_reclaim = nr_to_reclaim,
- .gfp_mask = GFP_HIGHUSER_MOVABLE,
.priority = DEF_PRIORITY,
- .may_writepage = 1,
.may_unmap = 1,
.may_swap = 1,
- .hibernation_mode = 1,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
struct task_struct *p = current;
unsigned long nr_reclaimed;
+ if (hibernation) {
+ sc.hibernation_mode = 1;
+ sc.may_writepage = 1;
+ sc.gfp_mask = GFP_HIGHUSER_MOVABLE;
+ } else {
+ sc.hibernation_mode = 0;
+ sc.may_writepage = !laptop_mode;
+ sc.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_HIGHMEM;
+ }
+
p->flags |= PF_MEMALLOC;
lockdep_set_current_reclaim_state(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
@@ -3442,7 +3449,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
return nr_reclaimed;
}
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_HIBERNATION || CONFIG_CMA_AGGRESSIVE */
/* It's optimal to keep kswapds on the same CPUs as their memory, but
not required for correctness. So if the last cpu in a node goes
--
1.9.1
Add CMA_AGGRESSIVE config that depend on CMA to Linux kernel config.
Add CMA_AGGRESSIVE_PHY_MAX, CMA_AGGRESSIVE_FREE_MIN and CMA_AGGRESSIVE_SHRINK
that depend on CMA_AGGRESSIVE.
If physical memory size (not include CMA memory) in byte less than or equal to
CMA_AGGRESSIVE_PHY_MAX, CMA aggressive switch (sysctl vm.cma-aggressive-switch)
will be opened.
When system boot, this value will set to sysctl "vm.cma-aggressive-free-min".
If this value is true, sysctl "vm.cma-aggressive-shrink-switch" will be set to
true when Linux boot.
Signed-off-by: Hui Zhu <[email protected]>
---
mm/Kconfig | 43 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig
index 1d1ae6b..940f5f3 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -527,6 +527,49 @@ config CMA_AREAS
If unsure, leave the default value "7".
+config CMA_AGGRESSIVE
+ bool "CMA aggressive"
+ depends on CMA
+ default n
+ help
+ Be more aggressive about taking memory from CMA when allocate MOVABLE
+ page.
+ Sysctl "vm.cma-aggressive-switch", "vm.cma-aggressive-alloc-max"
+ and "vm.cma-aggressive-shrink-switch" can control this function.
+ If unsure, say "n".
+
+config CMA_AGGRESSIVE_PHY_MAX
+ hex "Physical memory size in Bytes that auto turn on the CMA aggressive switch"
+ depends on CMA_AGGRESSIVE
+ default 0x40000000
+ help
+ If physical memory size (not include CMA memory) in byte less than or
+ equal to this value, CMA aggressive switch will be opened.
+ After the Linux boot, sysctl "vm.cma-aggressive-switch" can control
+ the CMA AGGRESSIVE switch.
+
+config CMA_AGGRESSIVE_FREE_MIN
+ int "The minimum free CMA page number that CMA aggressive work"
+ depends on CMA_AGGRESSIVE
+ default 500
+ help
+ When system boot, this value will set to sysctl
+ "vm.cma-aggressive-free-min".
+ If the number of CMA free pages is small than this sysctl value,
+ CMA aggressive will not work.
+
+config CMA_AGGRESSIVE_SHRINK
+ bool "CMA aggressive shrink"
+ depends on CMA_AGGRESSIVE
+ default y
+ help
+ If this value is true, sysctl "vm.cma-aggressive-shrink-switch" will
+ be set to true when Linux boot.
+ If sysctl "vm.cma-aggressive-shrink-switch" is true and free normal
+ memory's size is smaller than the size that it want to allocate,
+ do memory shrink before driver allocate pages from CMA.
+ If unsure, say "y".
+
config MEM_SOFT_DIRTY
bool "Track memory changes"
depends on CHECKPOINT_RESTORE && HAVE_ARCH_SOFT_DIRTY && PROC_FS
--
1.9.1
If page alloc function __rmqueue try to get pages from MIGRATE_MOVABLE and
conditions (cma_alloc_counter, cma_aggressive_free_min, cma_alloc_counter)
allow, MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first.
Signed-off-by: Hui Zhu <[email protected]>
---
mm/page_alloc.c | 42 +++++++++++++++++++++++++++++++-----------
1 file changed, 31 insertions(+), 11 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1..87bc326 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -65,6 +65,10 @@
#include <asm/div64.h>
#include "internal.h"
+#ifdef CONFIG_CMA_AGGRESSIVE
+#include <linux/cma.h>
+#endif
+
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
#define MIN_PERCPU_PAGELIST_FRACTION (8)
@@ -1189,20 +1193,36 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
{
struct page *page;
-retry_reserve:
+#ifdef CONFIG_CMA_AGGRESSIVE
+ if (cma_aggressive_switch
+ && migratetype == MIGRATE_MOVABLE
+ && atomic_read(&cma_alloc_counter) == 0
+ && global_page_state(NR_FREE_CMA_PAGES) > cma_aggressive_free_min
+ + (1 << order))
+ migratetype = MIGRATE_CMA;
+#endif
+retry:
page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
- page = __rmqueue_fallback(zone, order, migratetype);
+ if (unlikely(!page)) {
+#ifdef CONFIG_CMA_AGGRESSIVE
+ if (migratetype == MIGRATE_CMA) {
+ migratetype = MIGRATE_MOVABLE;
+ goto retry;
+ }
+#endif
+ if (migratetype != MIGRATE_RESERVE) {
+ page = __rmqueue_fallback(zone, order, migratetype);
- /*
- * Use MIGRATE_RESERVE rather than fail an allocation. goto
- * is used because __rmqueue_smallest is an inline function
- * and we want just one call site
- */
- if (!page) {
- migratetype = MIGRATE_RESERVE;
- goto retry_reserve;
+ /*
+ * Use MIGRATE_RESERVE rather than fail an allocation.
+ * goto is used because __rmqueue_smallest is an inline
+ * function and we want just one call site
+ */
+ if (!page) {
+ migratetype = MIGRATE_RESERVE;
+ goto retry;
+ }
}
}
--
1.9.1
Add cma_alloc_counter, cma_aggressive_switch, cma_aggressive_free_min and
cma_aggressive_shrink_switch.
cma_aggressive_switch is the swith for all CMA_AGGRESSIVE function. It can be
controlled by sysctl vm.cma-aggressive-switch.
cma_aggressive_free_min can be controlled by sysctl
"vm.cma-aggressive-free-min". If the number of CMA free pages is small than
this sysctl value, CMA_AGGRESSIVE will not work in page alloc code.
cma_aggressive_shrink_switch can be controlled by sysctl
"vm.cma-aggressive-shrink-switch". If sysctl "vm.cma-aggressive-shrink-switch"
is true and free normal memory's size is smaller than the size that it want to
allocate, do memory shrink with function shrink_all_memory before driver
allocate pages from CMA.
When Linux kernel try to reserve custom contiguous area, increase the value of
cma_alloc_counter. CMA_AGGRESSIVE will not work in page alloc code.
After reserve custom contiguous area function return, decreases the value of
cma_alloc_counter.
Signed-off-by: Hui Zhu <[email protected]>
---
include/linux/cma.h | 7 +++++++
kernel/sysctl.c | 27 +++++++++++++++++++++++++++
mm/cma.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 88 insertions(+)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 0430ed0..df96abf 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -15,6 +15,13 @@
struct cma;
+#ifdef CONFIG_CMA_AGGRESSIVE
+extern atomic_t cma_alloc_counter;
+extern int cma_aggressive_switch;
+extern unsigned long cma_aggressive_free_min;
+extern int cma_aggressive_shrink_switch;
+#endif
+
extern phys_addr_t cma_get_base(struct cma *cma);
extern unsigned long cma_get_size(struct cma *cma);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4aada6d..646929e2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,10 @@
#include <linux/nmi.h>
#endif
+#ifdef CONFIG_CMA_AGGRESSIVE
+#include <linux/cma.h>
+#endif
+
#if defined(CONFIG_SYSCTL)
@@ -1485,6 +1489,29 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_CMA_AGGRESSIVE
+ {
+ .procname = "cma-aggressive-switch",
+ .data = &cma_aggressive_switch,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "cma-aggressive-free-min",
+ .data = &cma_aggressive_free_min,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0600,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "cma-aggressive-shrink-switch",
+ .data = &cma_aggressive_shrink_switch,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{ }
};
diff --git a/mm/cma.c b/mm/cma.c
index 963bc4a..566ed5f 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -33,6 +33,7 @@
#include <linux/log2.h>
#include <linux/cma.h>
#include <linux/highmem.h>
+#include <linux/swap.h>
struct cma {
unsigned long base_pfn;
@@ -127,6 +128,27 @@ err:
return -EINVAL;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+/* The counter for the dma_alloc_from_contiguous and
+ dma_release_from_contiguous. */
+atomic_t cma_alloc_counter = ATOMIC_INIT(0);
+
+/* Swich of CMA_AGGRESSIVE. */
+int cma_aggressive_switch __read_mostly;
+
+/* If the number of CMA free pages is small than this value, CMA_AGGRESSIVE will
+ not work. */
+#ifdef CONFIG_CMA_AGGRESSIVE_FREE_MIN
+unsigned long cma_aggressive_free_min __read_mostly =
+ CONFIG_CMA_AGGRESSIVE_FREE_MIN;
+#else
+unsigned long cma_aggressive_free_min __read_mostly = 500;
+#endif
+
+/* Swich of CMA_AGGRESSIVE shink. */
+int cma_aggressive_shrink_switch __read_mostly;
+#endif
+
static int __init cma_init_reserved_areas(void)
{
int i;
@@ -138,6 +160,22 @@ static int __init cma_init_reserved_areas(void)
return ret;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+ cma_aggressive_switch = 0;
+#ifdef CONFIG_CMA_AGGRESSIVE_PHY_MAX
+ if (memblock_phys_mem_size() <= CONFIG_CMA_AGGRESSIVE_PHY_MAX)
+#else
+ if (memblock_phys_mem_size() <= 0x40000000)
+#endif
+ cma_aggressive_switch = 1;
+
+ cma_aggressive_shrink_switch = 0;
+#ifdef CONFIG_CMA_AGGRESSIVE_SHRINK
+ if (cma_aggressive_switch)
+ cma_aggressive_shrink_switch = 1;
+#endif
+#endif
+
return 0;
}
core_initcall(cma_init_reserved_areas);
@@ -312,6 +350,11 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
struct page *page = NULL;
int ret;
+#ifdef CONFIG_CMA_AGGRESSIVE
+ int free = global_page_state(NR_FREE_PAGES)
+ - global_page_state(NR_FREE_CMA_PAGES)
+ - totalreserve_pages;
+#endif
if (!cma || !cma->count)
return NULL;
@@ -326,6 +369,13 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
bitmap_maxno = cma_bitmap_maxno(cma);
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+#ifdef CONFIG_CMA_AGGRESSIVE
+ atomic_inc(&cma_alloc_counter);
+ if (cma_aggressive_switch && cma_aggressive_shrink_switch
+ && free < count)
+ shrink_all_memory(count - free, false);
+#endif
+
for (;;) {
mutex_lock(&cma->lock);
bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
@@ -361,6 +411,10 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
start = bitmap_no + mask + 1;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+ atomic_dec(&cma_alloc_counter);
+#endif
+
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
--
1.9.1
On Thu, Oct 16, 2014 at 11:35 AM, Hui Zhu <[email protected]> wrote:
> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
> MIGRATE_MOVABLE.
> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
> order that Linux kernel want.
>
> If a system that has a lot of user space program is running, for
> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
> allocated. Before function __rmqueue_fallback get memory from
> MIGRATE_CMA, the oom_killer will kill a task to release memory when
> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
> kernel kill some tasks to release memory.
I'm not very clear to this description, what issue do you try to solve?
Make MIGRATE_CMA be the fallback of desired MIGRATE_UNMOVABLE?
> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
> be more aggressive about allocation.
> If function CMA_AGGRESSIVE is available, when Linux kernel call function
> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
> doesn't have enough pages for allocation, go back to allocate memory from
> MIGRATE_MOVABLE.
I don't think so. That will cause MIGRATE_CMA depleted prematurely, and when a
user(such as camera) wants CMA memory, he will not get the wanted memory.
> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
I don't think this is the root cause of oom.
But I am interested in the CMA shrinker idea, I will follow this mail.
Thanks for your work, add some test data will be better.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
[CC list trimmed]
On Thursday, October 16, 2014 11:35:49 AM Hui Zhu wrote:
> Function shrink_all_memory try to free `nr_to_reclaim' of memory.
> CMA_AGGRESSIVE_SHRINK function will call this functon to free `nr_to_reclaim' of
> memory. It need different scan_control with current caller function
> hibernate_preallocate_memory.
>
> If hibernation is true, the caller is hibernate_preallocate_memory.
> if not, the caller is CMA alloc function.
>
> Signed-off-by: Hui Zhu <[email protected]>
> ---
> include/linux/swap.h | 3 ++-
> kernel/power/snapshot.c | 2 +-
> mm/vmscan.c | 19 +++++++++++++------
> 3 files changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 37a585b..9f2cb43 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -335,7 +335,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
> gfp_t gfp_mask, bool noswap,
> struct zone *zone,
> unsigned long *nr_scanned);
> -extern unsigned long shrink_all_memory(unsigned long nr_pages);
> +extern unsigned long shrink_all_memory(unsigned long nr_pages,
> + bool hibernation);
> extern int vm_swappiness;
> extern int remove_mapping(struct address_space *mapping, struct page *page);
> extern unsigned long vm_total_pages;
> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
> index 791a618..a00fc35 100644
> --- a/kernel/power/snapshot.c
> +++ b/kernel/power/snapshot.c
> @@ -1657,7 +1657,7 @@ int hibernate_preallocate_memory(void)
> * NOTE: If this is not done, performance will be hurt badly in some
> * test cases.
> */
> - shrink_all_memory(saveable - size);
> + shrink_all_memory(saveable - size, true);
Instead of doing this, can you please define
__shrink_all_memory()
that will take the appropriate struct scan_control as an argument and
then define two wrappers around that, one for hibernation and one for CMA?
The way you did it opens a field for bugs caused by passing a wrong value
as the second argument.
>
> /*
> * The number of saveable pages in memory was too high, so apply some
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index dcb4707..fdcfa30 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3404,7 +3404,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
> wake_up_interruptible(&pgdat->kswapd_wait);
> }
>
> -#ifdef CONFIG_HIBERNATION
> +#if defined CONFIG_HIBERNATION || defined CONFIG_CMA_AGGRESSIVE
> /*
> * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
> * freed pages.
> @@ -3413,22 +3413,29 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
> * LRU order by reclaiming preferentially
> * inactive > active > active referenced > active mapped
> */
> -unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
> +unsigned long shrink_all_memory(unsigned long nr_to_reclaim, bool hibernation)
> {
> struct reclaim_state reclaim_state;
> struct scan_control sc = {
> .nr_to_reclaim = nr_to_reclaim,
> - .gfp_mask = GFP_HIGHUSER_MOVABLE,
> .priority = DEF_PRIORITY,
> - .may_writepage = 1,
> .may_unmap = 1,
> .may_swap = 1,
> - .hibernation_mode = 1,
> };
> struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
> struct task_struct *p = current;
> unsigned long nr_reclaimed;
>
> + if (hibernation) {
> + sc.hibernation_mode = 1;
> + sc.may_writepage = 1;
> + sc.gfp_mask = GFP_HIGHUSER_MOVABLE;
> + } else {
> + sc.hibernation_mode = 0;
> + sc.may_writepage = !laptop_mode;
> + sc.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_HIGHMEM;
> + }
> +
> p->flags |= PF_MEMALLOC;
> lockdep_set_current_reclaim_state(sc.gfp_mask);
> reclaim_state.reclaimed_slab = 0;
> @@ -3442,7 +3449,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>
> return nr_reclaimed;
> }
> -#endif /* CONFIG_HIBERNATION */
> +#endif /* CONFIG_HIBERNATION || CONFIG_CMA_AGGRESSIVE */
>
> /* It's optimal to keep kswapds on the same CPUs as their memory, but
> not required for correctness. So if the last cpu in a node goes
>
--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
On 10/15/2014 8:35 PM, Hui Zhu wrote:
> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
> MIGRATE_MOVABLE.
> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
> order that Linux kernel want.
>
> If a system that has a lot of user space program is running, for
> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
> allocated. Before function __rmqueue_fallback get memory from
> MIGRATE_CMA, the oom_killer will kill a task to release memory when
> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
> kernel kill some tasks to release memory.
>
> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
> be more aggressive about allocation.
> If function CMA_AGGRESSIVE is available, when Linux kernel call function
> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
> doesn't have enough pages for allocation, go back to allocate memory from
> MIGRATE_MOVABLE.
> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
>
It's good to see another proposal to fix CMA utilization. Do you have
any data about the success rate of CMA contiguous allocation after
this patch series? I played around with a similar approach of using
CMA for MIGRATE_MOVABLE allocations and found that although utilization
did increase, contiguous allocations failed at a higher rate and were
much slower. I see what this series is trying to do with avoiding
allocation from CMA pages when a contiguous allocation is progress.
My concern is that there would still be problems with contiguous
allocation after all the MIGRATE_MOVABLE fallback has happened.
Thanks,
Laura
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation
On 10/16/14 16:29, Rafael J. Wysocki wrote:
> [CC list trimmed]
>
> On Thursday, October 16, 2014 11:35:49 AM Hui Zhu wrote:
>> Function shrink_all_memory try to free `nr_to_reclaim' of memory.
>> CMA_AGGRESSIVE_SHRINK function will call this functon to free `nr_to_reclaim' of
>> memory. It need different scan_control with current caller function
>> hibernate_preallocate_memory.
>>
>> If hibernation is true, the caller is hibernate_preallocate_memory.
>> if not, the caller is CMA alloc function.
>>
>> Signed-off-by: Hui Zhu <[email protected]>
>> ---
>> include/linux/swap.h | 3 ++-
>> kernel/power/snapshot.c | 2 +-
>> mm/vmscan.c | 19 +++++++++++++------
>> 3 files changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 37a585b..9f2cb43 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -335,7 +335,8 @@ extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
>> gfp_t gfp_mask, bool noswap,
>> struct zone *zone,
>> unsigned long *nr_scanned);
>> -extern unsigned long shrink_all_memory(unsigned long nr_pages);
>> +extern unsigned long shrink_all_memory(unsigned long nr_pages,
>> + bool hibernation);
>> extern int vm_swappiness;
>> extern int remove_mapping(struct address_space *mapping, struct page *page);
>> extern unsigned long vm_total_pages;
>> diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
>> index 791a618..a00fc35 100644
>> --- a/kernel/power/snapshot.c
>> +++ b/kernel/power/snapshot.c
>> @@ -1657,7 +1657,7 @@ int hibernate_preallocate_memory(void)
>> * NOTE: If this is not done, performance will be hurt badly in some
>> * test cases.
>> */
>> - shrink_all_memory(saveable - size);
>> + shrink_all_memory(saveable - size, true);
>
> Instead of doing this, can you please define
>
> __shrink_all_memory()
>
> that will take the appropriate struct scan_control as an argument and
> then define two wrappers around that, one for hibernation and one for CMA?
>
> The way you did it opens a field for bugs caused by passing a wrong value
> as the second argument.
Thanks Rafael.
I will update patch according to your comments.
Best,
Hui
>
>>
>> /*
>> * The number of saveable pages in memory was too high, so apply some
>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>> index dcb4707..fdcfa30 100644
>> --- a/mm/vmscan.c
>> +++ b/mm/vmscan.c
>> @@ -3404,7 +3404,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
>> wake_up_interruptible(&pgdat->kswapd_wait);
>> }
>>
>> -#ifdef CONFIG_HIBERNATION
>> +#if defined CONFIG_HIBERNATION || defined CONFIG_CMA_AGGRESSIVE
>> /*
>> * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
>> * freed pages.
>> @@ -3413,22 +3413,29 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
>> * LRU order by reclaiming preferentially
>> * inactive > active > active referenced > active mapped
>> */
>> -unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>> +unsigned long shrink_all_memory(unsigned long nr_to_reclaim, bool hibernation)
>> {
>> struct reclaim_state reclaim_state;
>> struct scan_control sc = {
>> .nr_to_reclaim = nr_to_reclaim,
>> - .gfp_mask = GFP_HIGHUSER_MOVABLE,
>> .priority = DEF_PRIORITY,
>> - .may_writepage = 1,
>> .may_unmap = 1,
>> .may_swap = 1,
>> - .hibernation_mode = 1,
>> };
>> struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
>> struct task_struct *p = current;
>> unsigned long nr_reclaimed;
>>
>> + if (hibernation) {
>> + sc.hibernation_mode = 1;
>> + sc.may_writepage = 1;
>> + sc.gfp_mask = GFP_HIGHUSER_MOVABLE;
>> + } else {
>> + sc.hibernation_mode = 0;
>> + sc.may_writepage = !laptop_mode;
>> + sc.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_HIGHMEM;
>> + }
>> +
>> p->flags |= PF_MEMALLOC;
>> lockdep_set_current_reclaim_state(sc.gfp_mask);
>> reclaim_state.reclaimed_slab = 0;
>> @@ -3442,7 +3449,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>>
>> return nr_reclaimed;
>> }
>> -#endif /* CONFIG_HIBERNATION */
>> +#endif /* CONFIG_HIBERNATION || CONFIG_CMA_AGGRESSIVE */
>>
>> /* It's optimal to keep kswapds on the same CPUs as their memory, but
>> not required for correctness. So if the last cpu in a node goes
>>
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On 10/16/14 16:56, Laura Abbott wrote:
> On 10/15/2014 8:35 PM, Hui Zhu wrote:
>> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
>> MIGRATE_MOVABLE.
>> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
>> order that Linux kernel want.
>>
>> If a system that has a lot of user space program is running, for
>> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
>> allocated. Before function __rmqueue_fallback get memory from
>> MIGRATE_CMA, the oom_killer will kill a task to release memory when
>> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
>> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
>> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
>> kernel kill some tasks to release memory.
>>
>> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
>> be more aggressive about allocation.
>> If function CMA_AGGRESSIVE is available, when Linux kernel call function
>> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
>> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
>> doesn't have enough pages for allocation, go back to allocate memory from
>> MIGRATE_MOVABLE.
>> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
>> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
>>
>
> It's good to see another proposal to fix CMA utilization.
Thanks Laura.
Do you have
> any data about the success rate of CMA contiguous allocation after
> this patch series? I played around with a similar approach of using
> CMA for MIGRATE_MOVABLE allocations and found that although utilization
> did increase, contiguous allocations failed at a higher rate and were
> much slower. I see what this series is trying to do with avoiding
> allocation from CMA pages when a contiguous allocation is progress.
> My concern is that there would still be problems with contiguous
> allocation after all the MIGRATE_MOVABLE fallback has happened.
I did some test with the cma_alloc_counter and cma-aggressive-shrink in
a android board that has 1g memory. Run some apps to make free CMA
close to the value of cma_aggressive_free_min(500 pages). A driver
Begin to request CMA more than 10 times. Each time, it will request more
than 3000 pages.
I don't have established number for that because it is really hard to
get a fail. I think the success rate is over 95% at least.
And I think maybe the isolate fail has relation with page alloc and free
code. Maybe let zone->lock protect more code can handle this issue.
Thanks,
Hui
>
> Thanks,
> Laura
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
Update this patch according to the comments from Rafael.
Function shrink_all_memory_for_cma try to free `nr_to_reclaim' of memory.
CMA aggressive shrink function will call this functon to free `nr_to_reclaim' of
memory.
Signed-off-by: Hui Zhu <[email protected]>
---
mm/vmscan.c | 58 +++++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 43 insertions(+), 15 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dcb4707..658dc8d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3404,6 +3404,28 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
wake_up_interruptible(&pgdat->kswapd_wait);
}
+#if defined CONFIG_HIBERNATION || defined CONFIG_CMA_AGGRESSIVE
+static unsigned long __shrink_all_memory(struct scan_control *sc)
+{
+ struct reclaim_state reclaim_state;
+ struct zonelist *zonelist = node_zonelist(numa_node_id(), sc->gfp_mask);
+ struct task_struct *p = current;
+ unsigned long nr_reclaimed;
+
+ p->flags |= PF_MEMALLOC;
+ lockdep_set_current_reclaim_state(sc->gfp_mask);
+ reclaim_state.reclaimed_slab = 0;
+ p->reclaim_state = &reclaim_state;
+
+ nr_reclaimed = do_try_to_free_pages(zonelist, sc);
+
+ p->reclaim_state = NULL;
+ lockdep_clear_current_reclaim_state();
+ p->flags &= ~PF_MEMALLOC;
+
+ return nr_reclaimed;
+}
+
#ifdef CONFIG_HIBERNATION
/*
* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
@@ -3415,7 +3437,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
*/
unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
{
- struct reclaim_state reclaim_state;
struct scan_control sc = {
.nr_to_reclaim = nr_to_reclaim,
.gfp_mask = GFP_HIGHUSER_MOVABLE,
@@ -3425,24 +3446,31 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.may_swap = 1,
.hibernation_mode = 1,
};
- struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
- struct task_struct *p = current;
- unsigned long nr_reclaimed;
-
- p->flags |= PF_MEMALLOC;
- lockdep_set_current_reclaim_state(sc.gfp_mask);
- reclaim_state.reclaimed_slab = 0;
- p->reclaim_state = &reclaim_state;
- nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ return __shrink_all_memory(&sc);
+}
+#endif /* CONFIG_HIBERNATION */
- p->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
- p->flags &= ~PF_MEMALLOC;
+#ifdef CONFIG_CMA_AGGRESSIVE
+/*
+ * Try to free `nr_to_reclaim' of memory, system-wide, for CMA aggressive
+ * shrink function.
+ */
+void shrink_all_memory_for_cma(unsigned long nr_to_reclaim)
+{
+ struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_HIGHMEM,
+ .priority = DEF_PRIORITY,
+ .may_writepage = !laptop_mode,
+ .may_unmap = 1,
+ .may_swap = 1,
+ };
- return nr_reclaimed;
+ __shrink_all_memory(&sc);
}
-#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_CMA_AGGRESSIVE */
+#endif /* CONFIG_HIBERNATION || CONFIG_CMA_AGGRESSIVE */
/* It's optimal to keep kswapds on the same CPUs as their memory, but
not required for correctness. So if the last cpu in a node goes
--
1.9.1
Update this patch according to the comments from Rafael.
Add cma_alloc_counter, cma_aggressive_switch, cma_aggressive_free_min and
cma_aggressive_shrink_switch.
cma_aggressive_switch is the swith for all CMA_AGGRESSIVE function. It can be
controlled by sysctl vm.cma-aggressive-switch.
cma_aggressive_free_min can be controlled by sysctl
"vm.cma-aggressive-free-min". If the number of CMA free pages is small than
this sysctl value, CMA_AGGRESSIVE will not work in page alloc code.
cma_aggressive_shrink_switch can be controlled by sysctl
"vm.cma-aggressive-shrink-switch". If sysctl "vm.cma-aggressive-shrink-switch"
is true and free normal memory's size is smaller than the size that it want to
allocate, do memory shrink with function git commit -a --amend before driver
allocate pages from CMA.
When Linux kernel try to reserve custom contiguous area, increase the value of
cma_alloc_counter. CMA_AGGRESSIVE will not work in page alloc code.
After reserve custom contiguous area function return, decreases the value of
cma_alloc_counter.
Signed-off-by: Hui Zhu <[email protected]>
---
include/linux/cma.h | 7 +++++++
kernel/sysctl.c | 27 +++++++++++++++++++++++++++
mm/cma.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 88 insertions(+)
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 0430ed0..df96abf 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -15,6 +15,13 @@
struct cma;
+#ifdef CONFIG_CMA_AGGRESSIVE
+extern atomic_t cma_alloc_counter;
+extern int cma_aggressive_switch;
+extern unsigned long cma_aggressive_free_min;
+extern int cma_aggressive_shrink_switch;
+#endif
+
extern phys_addr_t cma_get_base(struct cma *cma);
extern unsigned long cma_get_size(struct cma *cma);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4aada6d..646929e2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,10 @@
#include <linux/nmi.h>
#endif
+#ifdef CONFIG_CMA_AGGRESSIVE
+#include <linux/cma.h>
+#endif
+
#if defined(CONFIG_SYSCTL)
@@ -1485,6 +1489,29 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
},
+#ifdef CONFIG_CMA_AGGRESSIVE
+ {
+ .procname = "cma-aggressive-switch",
+ .data = &cma_aggressive_switch,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "cma-aggressive-free-min",
+ .data = &cma_aggressive_free_min,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0600,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "cma-aggressive-shrink-switch",
+ .data = &cma_aggressive_shrink_switch,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{ }
};
diff --git a/mm/cma.c b/mm/cma.c
index 963bc4a..1cf341c 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -33,6 +33,7 @@
#include <linux/log2.h>
#include <linux/cma.h>
#include <linux/highmem.h>
+#include <linux/swap.h>
struct cma {
unsigned long base_pfn;
@@ -127,6 +128,27 @@ err:
return -EINVAL;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+/* The counter for the dma_alloc_from_contiguous and
+ dma_release_from_contiguous. */
+atomic_t cma_alloc_counter = ATOMIC_INIT(0);
+
+/* Swich of CMA_AGGRESSIVE. */
+int cma_aggressive_switch __read_mostly;
+
+/* If the number of CMA free pages is small than this value, CMA_AGGRESSIVE will
+ not work. */
+#ifdef CONFIG_CMA_AGGRESSIVE_FREE_MIN
+unsigned long cma_aggressive_free_min __read_mostly =
+ CONFIG_CMA_AGGRESSIVE_FREE_MIN;
+#else
+unsigned long cma_aggressive_free_min __read_mostly = 500;
+#endif
+
+/* Swich of CMA_AGGRESSIVE shink. */
+int cma_aggressive_shrink_switch __read_mostly;
+#endif
+
static int __init cma_init_reserved_areas(void)
{
int i;
@@ -138,6 +160,22 @@ static int __init cma_init_reserved_areas(void)
return ret;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+ cma_aggressive_switch = 0;
+#ifdef CONFIG_CMA_AGGRESSIVE_PHY_MAX
+ if (memblock_phys_mem_size() <= CONFIG_CMA_AGGRESSIVE_PHY_MAX)
+#else
+ if (memblock_phys_mem_size() <= 0x40000000)
+#endif
+ cma_aggressive_switch = 1;
+
+ cma_aggressive_shrink_switch = 0;
+#ifdef CONFIG_CMA_AGGRESSIVE_SHRINK
+ if (cma_aggressive_switch)
+ cma_aggressive_shrink_switch = 1;
+#endif
+#endif
+
return 0;
}
core_initcall(cma_init_reserved_areas);
@@ -312,6 +350,11 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
struct page *page = NULL;
int ret;
+#ifdef CONFIG_CMA_AGGRESSIVE
+ int free = global_page_state(NR_FREE_PAGES)
+ - global_page_state(NR_FREE_CMA_PAGES)
+ - totalreserve_pages;
+#endif
if (!cma || !cma->count)
return NULL;
@@ -326,6 +369,13 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
bitmap_maxno = cma_bitmap_maxno(cma);
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+#ifdef CONFIG_CMA_AGGRESSIVE
+ atomic_inc(&cma_alloc_counter);
+ if (cma_aggressive_switch && cma_aggressive_shrink_switch
+ && free < count)
+ shrink_all_memory_for_cma(count - free);
+#endif
+
for (;;) {
mutex_lock(&cma->lock);
bitmap_no = bitmap_find_next_zero_area(cma->bitmap,
@@ -361,6 +411,10 @@ struct page *cma_alloc(struct cma *cma, int count, unsigned int align)
start = bitmap_no + mask + 1;
}
+#ifdef CONFIG_CMA_AGGRESSIVE
+ atomic_dec(&cma_alloc_counter);
+#endif
+
pr_debug("%s(): returned %p\n", __func__, page);
return page;
}
--
1.9.1
Hi!
> Add CMA_AGGRESSIVE config that depend on CMA to Linux kernel config.
> Add CMA_AGGRESSIVE_PHY_MAX, CMA_AGGRESSIVE_FREE_MIN and CMA_AGGRESSIVE_SHRINK
> that depend on CMA_AGGRESSIVE.
>
> If physical memory size (not include CMA memory) in byte less than or equal to
> CMA_AGGRESSIVE_PHY_MAX, CMA aggressive switch (sysctl vm.cma-aggressive-switch)
> will be opened.
Ok...
Do I understand it correctly that there is some problem with
hibernation not working on machines not working on machines with big
CMA areas...?
But adding 4 config options end-user has no chance to set right can
not be the best solution, can it?
> +config CMA_AGGRESSIVE_PHY_MAX
> + hex "Physical memory size in Bytes that auto turn on the CMA aggressive switch"
> + depends on CMA_AGGRESSIVE
> + default 0x40000000
> + help
> + If physical memory size (not include CMA memory) in byte less than or
> + equal to this value, CMA aggressive switch will be opened.
> + After the Linux boot, sysctl "vm.cma-aggressive-switch" can control
> + the CMA AGGRESSIVE switch.
For example... how am I expected to figure right value to place here?
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
On 10/22/14 09:27, Pavel Machek wrote:
> Hi!
>
>> Add CMA_AGGRESSIVE config that depend on CMA to Linux kernel config.
>> Add CMA_AGGRESSIVE_PHY_MAX, CMA_AGGRESSIVE_FREE_MIN and CMA_AGGRESSIVE_SHRINK
>> that depend on CMA_AGGRESSIVE.
>>
>> If physical memory size (not include CMA memory) in byte less than or equal to
>> CMA_AGGRESSIVE_PHY_MAX, CMA aggressive switch (sysctl vm.cma-aggressive-switch)
>> will be opened.
>
> Ok...
>
> Do I understand it correctly that there is some problem with
> hibernation not working on machines not working on machines with big
> CMA areas...?
No, these patches want to handle this issue that most of CMA memory is
not allocated before lowmemorykiller or oom_killer begin to kill tasks.
>
> But adding 4 config options end-user has no chance to set right can
> not be the best solution, can it?
>
>> +config CMA_AGGRESSIVE_PHY_MAX
>> + hex "Physical memory size in Bytes that auto turn on the CMA aggressive switch"
>> + depends on CMA_AGGRESSIVE
>> + default 0x40000000
>> + help
>> + If physical memory size (not include CMA memory) in byte less than or
>> + equal to this value, CMA aggressive switch will be opened.
>> + After the Linux boot, sysctl "vm.cma-aggressive-switch" can control
>> + the CMA AGGRESSIVE switch.
>
> For example... how am I expected to figure right value to place here?
I agree with that. I will update this config to auto set in next version.
Thanks,
Hui
>
> Pavel
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On 10/16/2014 04:55 AM, Laura Abbott wrote:
> On 10/15/2014 8:35 PM, Hui Zhu wrote:
>> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
>> MIGRATE_MOVABLE.
>> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
>> order that Linux kernel want.
>>
>> If a system that has a lot of user space program is running, for
>> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
>> allocated. Before function __rmqueue_fallback get memory from
>> MIGRATE_CMA, the oom_killer will kill a task to release memory when
>> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
>> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
>> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
>> kernel kill some tasks to release memory.
>>
>> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
>> be more aggressive about allocation.
>> If function CMA_AGGRESSIVE is available, when Linux kernel call function
>> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
>> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
>> doesn't have enough pages for allocation, go back to allocate memory from
>> MIGRATE_MOVABLE.
>> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
>> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
>>
>
> It's good to see another proposal to fix CMA utilization. Do you have
> any data about the success rate of CMA contiguous allocation after
> this patch series? I played around with a similar approach of using
> CMA for MIGRATE_MOVABLE allocations and found that although utilization
> did increase, contiguous allocations failed at a higher rate and were
> much slower. I see what this series is trying to do with avoiding
> allocation from CMA pages when a contiguous allocation is progress.
> My concern is that there would still be problems with contiguous
> allocation after all the MIGRATE_MOVABLE fallback has happened.
What impact does this series have on x86 platforms now that CMA is the
backup allocator for all iommu dma allocations?
Regards,
Peter Hurley
On 10/22/14 20:02, Peter Hurley wrote:
> On 10/16/2014 04:55 AM, Laura Abbott wrote:
>> On 10/15/2014 8:35 PM, Hui Zhu wrote:
>>> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
>>> MIGRATE_MOVABLE.
>>> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
>>> order that Linux kernel want.
>>>
>>> If a system that has a lot of user space program is running, for
>>> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
>>> allocated. Before function __rmqueue_fallback get memory from
>>> MIGRATE_CMA, the oom_killer will kill a task to release memory when
>>> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
>>> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
>>> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
>>> kernel kill some tasks to release memory.
>>>
>>> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
>>> be more aggressive about allocation.
>>> If function CMA_AGGRESSIVE is available, when Linux kernel call function
>>> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
>>> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
>>> doesn't have enough pages for allocation, go back to allocate memory from
>>> MIGRATE_MOVABLE.
>>> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
>>> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
>>>
>>
>> It's good to see another proposal to fix CMA utilization. Do you have
>> any data about the success rate of CMA contiguous allocation after
>> this patch series? I played around with a similar approach of using
>> CMA for MIGRATE_MOVABLE allocations and found that although utilization
>> did increase, contiguous allocations failed at a higher rate and were
>> much slower. I see what this series is trying to do with avoiding
>> allocation from CMA pages when a contiguous allocation is progress.
>> My concern is that there would still be problems with contiguous
>> allocation after all the MIGRATE_MOVABLE fallback has happened.
>
> What impact does this series have on x86 platforms now that CMA is the
> backup allocator for all iommu dma allocations?
They will not affect driver CMA memory allocation.
Thanks,
Hui
>
> Regards,
> Peter Hurley
>
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On Thu, Oct 16, 2014 at 11:35:47AM +0800, Hui Zhu wrote:
> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
> MIGRATE_MOVABLE.
> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
> order that Linux kernel want.
>
> If a system that has a lot of user space program is running, for
> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
> allocated. Before function __rmqueue_fallback get memory from
> MIGRATE_CMA, the oom_killer will kill a task to release memory when
> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
> kernel kill some tasks to release memory.
>
> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
> be more aggressive about allocation.
> If function CMA_AGGRESSIVE is available, when Linux kernel call function
> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
> doesn't have enough pages for allocation, go back to allocate memory from
> MIGRATE_MOVABLE.
> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
Hello,
I did some work similar to this.
Please reference following links.
https://lkml.org/lkml/2014/5/28/64
https://lkml.org/lkml/2014/5/28/57
And, aggressive allocation should be postponed until freepage counting
bug is fixed, because aggressive allocation enlarge the possiblity
of problem occurence. I tried to fix that bug, too. See following link.
https://lkml.org/lkml/2014/10/23/90
Thanks.
On Thu, Oct 16, 2014 at 11:35:51AM +0800, Hui Zhu wrote:
> If page alloc function __rmqueue try to get pages from MIGRATE_MOVABLE and
> conditions (cma_alloc_counter, cma_aggressive_free_min, cma_alloc_counter)
> allow, MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first.
>
> Signed-off-by: Hui Zhu <[email protected]>
> ---
> mm/page_alloc.c | 42 +++++++++++++++++++++++++++++++-----------
> 1 file changed, 31 insertions(+), 11 deletions(-)
>
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 736d8e1..87bc326 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -65,6 +65,10 @@
> #include <asm/div64.h>
> #include "internal.h"
>
> +#ifdef CONFIG_CMA_AGGRESSIVE
> +#include <linux/cma.h>
> +#endif
> +
> /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
> static DEFINE_MUTEX(pcp_batch_high_lock);
> #define MIN_PERCPU_PAGELIST_FRACTION (8)
> @@ -1189,20 +1193,36 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
> {
> struct page *page;
>
> -retry_reserve:
> +#ifdef CONFIG_CMA_AGGRESSIVE
> + if (cma_aggressive_switch
> + && migratetype == MIGRATE_MOVABLE
> + && atomic_read(&cma_alloc_counter) == 0
> + && global_page_state(NR_FREE_CMA_PAGES) > cma_aggressive_free_min
> + + (1 << order))
> + migratetype = MIGRATE_CMA;
> +#endif
> +retry:
I don't get it why cma_alloc_counter should be tested.
When cma alloc is progress, pageblock is isolated so that pages on that
pageblock cannot be allocated. Why should we prevent aggressive
allocation in this case?
Thanks.
On 10/16/2014 10:55 AM, Laura Abbott wrote:
> On 10/15/2014 8:35 PM, Hui Zhu wrote:
>
> It's good to see another proposal to fix CMA utilization. Do you have
> any data about the success rate of CMA contiguous allocation after
> this patch series? I played around with a similar approach of using
> CMA for MIGRATE_MOVABLE allocations and found that although utilization
> did increase, contiguous allocations failed at a higher rate and were
> much slower. I see what this series is trying to do with avoiding
> allocation from CMA pages when a contiguous allocation is progress.
> My concern is that there would still be problems with contiguous
> allocation after all the MIGRATE_MOVABLE fallback has happened.
Hi,
did anyone try/suggest the following idea?
- keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
- when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA pageblocks
have space, don't OOM immediately, but first try to migrate some MOVABLE
pages to CMA pageblocks, to make space for the UNMOVABLE allocation in
non-CMA pageblocks
- this should keep CMA pageblocks free as long as possible and useful
for CMA allocations, but without restricting the non-MOVABLE allocations
even though there is free memory (but in CMA pageblocks)
- the fact that a MOVABLE page could be successfully migrated to CMA
pageblock, means it was not pinned or otherwise non-migratable, so
there's a good chance it can be migrated back again if CMA pageblocks
need to be used by CMA allocation
- it's more complex, but I guess we have most of the necessary
infrastructure in compaction already :)
Thoughts?
Vlastimil
> Thanks,
> Laura
>
On Fri, Oct 24, 2014 at 1:25 PM, Joonsoo Kim <[email protected]> wrote:
> On Thu, Oct 16, 2014 at 11:35:47AM +0800, Hui Zhu wrote:
>> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
>> MIGRATE_MOVABLE.
>> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
>> order that Linux kernel want.
>>
>> If a system that has a lot of user space program is running, for
>> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
>> allocated. Before function __rmqueue_fallback get memory from
>> MIGRATE_CMA, the oom_killer will kill a task to release memory when
>> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
>> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
>> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
>> kernel kill some tasks to release memory.
>>
>> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
>> be more aggressive about allocation.
>> If function CMA_AGGRESSIVE is available, when Linux kernel call function
>> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
>> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
>> doesn't have enough pages for allocation, go back to allocate memory from
>> MIGRATE_MOVABLE.
>> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
>> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
>
> Hello,
>
> I did some work similar to this.
> Please reference following links.
>
> https://lkml.org/lkml/2014/5/28/64
> https://lkml.org/lkml/2014/5/28/57
> I tested #1 approach and found the problem. Although free memory on
> meminfo can move around low watermark, there is large fluctuation on free
> memory, because too many pages are reclaimed when kswapd is invoked.
> Reason for this behaviour is that successive allocated CMA pages are
> on the LRU list in that order and kswapd reclaim them in same order.
> These memory doesn't help watermark checking from kwapd, so too many
> pages are reclaimed, I guess.
This issue can be handle with some change around shrink code. I am
trying to integrate a patch for them.
But I am not sure we met the same issue. Do you mind give me more
info about this part?
>
> And, aggressive allocation should be postponed until freepage counting
> bug is fixed, because aggressive allocation enlarge the possiblity
> of problem occurence. I tried to fix that bug, too. See following link.
>
> https://lkml.org/lkml/2014/10/23/90
I am following these patches. They are great! Thanks for your work.
Best,
Hui
>
> Thanks.
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
On Mon, Nov 03, 2014 at 03:28:38PM +0800, Hui Zhu wrote:
> On Fri, Oct 24, 2014 at 1:25 PM, Joonsoo Kim <[email protected]> wrote:
> > On Thu, Oct 16, 2014 at 11:35:47AM +0800, Hui Zhu wrote:
> >> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
> >> MIGRATE_MOVABLE.
> >> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
> >> order that Linux kernel want.
> >>
> >> If a system that has a lot of user space program is running, for
> >> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
> >> allocated. Before function __rmqueue_fallback get memory from
> >> MIGRATE_CMA, the oom_killer will kill a task to release memory when
> >> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
> >> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
> >> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
> >> kernel kill some tasks to release memory.
> >>
> >> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
> >> be more aggressive about allocation.
> >> If function CMA_AGGRESSIVE is available, when Linux kernel call function
> >> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
> >> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
> >> doesn't have enough pages for allocation, go back to allocate memory from
> >> MIGRATE_MOVABLE.
> >> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
> >> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
> >
> > Hello,
> >
> > I did some work similar to this.
> > Please reference following links.
> >
> > https://lkml.org/lkml/2014/5/28/64
> > https://lkml.org/lkml/2014/5/28/57
>
> > I tested #1 approach and found the problem. Although free memory on
> > meminfo can move around low watermark, there is large fluctuation on free
> > memory, because too many pages are reclaimed when kswapd is invoked.
> > Reason for this behaviour is that successive allocated CMA pages are
> > on the LRU list in that order and kswapd reclaim them in same order.
> > These memory doesn't help watermark checking from kwapd, so too many
> > pages are reclaimed, I guess.
>
> This issue can be handle with some change around shrink code. I am
> trying to integrate a patch for them.
> But I am not sure we met the same issue. Do you mind give me more
> info about this part?
I forgot the issue because there is so big time-gap. I need sometime
to bring issue back to my brain. I will answer it soon after some thinking.
>
> >
> > And, aggressive allocation should be postponed until freepage counting
> > bug is fixed, because aggressive allocation enlarge the possiblity
> > of problem occurence. I tried to fix that bug, too. See following link.
> >
> > https://lkml.org/lkml/2014/10/23/90
>
> I am following these patches. They are great! Thanks for your work.
Thanks. :)
On Wed, Oct 29, 2014 at 10:43 PM, Vlastimil Babka <[email protected]> wrote:
> On 10/16/2014 10:55 AM, Laura Abbott wrote:
>>
>> On 10/15/2014 8:35 PM, Hui Zhu wrote:
>>
>> It's good to see another proposal to fix CMA utilization. Do you have
>> any data about the success rate of CMA contiguous allocation after
>> this patch series? I played around with a similar approach of using
>> CMA for MIGRATE_MOVABLE allocations and found that although utilization
>> did increase, contiguous allocations failed at a higher rate and were
>> much slower. I see what this series is trying to do with avoiding
>> allocation from CMA pages when a contiguous allocation is progress.
>> My concern is that there would still be problems with contiguous
>> allocation after all the MIGRATE_MOVABLE fallback has happened.
>
>
> Hi,
>
> did anyone try/suggest the following idea?
>
> - keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
> - when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA pageblocks
> have space, don't OOM immediately, but first try to migrate some MOVABLE
> pages to CMA pageblocks, to make space for the UNMOVABLE allocation in
> non-CMA pageblocks
> - this should keep CMA pageblocks free as long as possible and useful for
> CMA allocations, but without restricting the non-MOVABLE allocations even
> though there is free memory (but in CMA pageblocks)
> - the fact that a MOVABLE page could be successfully migrated to CMA
> pageblock, means it was not pinned or otherwise non-migratable, so there's a
> good chance it can be migrated back again if CMA pageblocks need to be used
> by CMA allocation
> - it's more complex, but I guess we have most of the necessary
> infrastructure in compaction already :)
I think this idea make CMA allocation part become complex but make
balance and shrink code become easy because it make CMA become real
memory.
I just worry about the speed of migrate memory with this idea. :)
Thanks,
Hui
>
> Thoughts?
> Vlastimil
>
>> Thanks,
>> Laura
>>
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
On Mon, Nov 03, 2014 at 05:05:46PM +0900, Joonsoo Kim wrote:
> On Mon, Nov 03, 2014 at 03:28:38PM +0800, Hui Zhu wrote:
> > On Fri, Oct 24, 2014 at 1:25 PM, Joonsoo Kim <[email protected]> wrote:
> > > On Thu, Oct 16, 2014 at 11:35:47AM +0800, Hui Zhu wrote:
> > >> In fallbacks of page_alloc.c, MIGRATE_CMA is the fallback of
> > >> MIGRATE_MOVABLE.
> > >> MIGRATE_MOVABLE will use MIGRATE_CMA when it doesn't have a page in
> > >> order that Linux kernel want.
> > >>
> > >> If a system that has a lot of user space program is running, for
> > >> instance, an Android board, most of memory is in MIGRATE_MOVABLE and
> > >> allocated. Before function __rmqueue_fallback get memory from
> > >> MIGRATE_CMA, the oom_killer will kill a task to release memory when
> > >> kernel want get MIGRATE_UNMOVABLE memory because fallbacks of
> > >> MIGRATE_UNMOVABLE are MIGRATE_RECLAIMABLE and MIGRATE_MOVABLE.
> > >> This status is odd. The MIGRATE_CMA has a lot free memory but Linux
> > >> kernel kill some tasks to release memory.
> > >>
> > >> This patch series adds a new function CMA_AGGRESSIVE to make CMA memory
> > >> be more aggressive about allocation.
> > >> If function CMA_AGGRESSIVE is available, when Linux kernel call function
> > >> __rmqueue try to get pages from MIGRATE_MOVABLE and conditions allow,
> > >> MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first. If MIGRATE_CMA
> > >> doesn't have enough pages for allocation, go back to allocate memory from
> > >> MIGRATE_MOVABLE.
> > >> Then the memory of MIGRATE_MOVABLE can be kept for MIGRATE_UNMOVABLE and
> > >> MIGRATE_RECLAIMABLE which doesn't have fallback MIGRATE_CMA.
> > >
> > > Hello,
> > >
> > > I did some work similar to this.
> > > Please reference following links.
> > >
> > > https://lkml.org/lkml/2014/5/28/64
> > > https://lkml.org/lkml/2014/5/28/57
> >
> > > I tested #1 approach and found the problem. Although free memory on
> > > meminfo can move around low watermark, there is large fluctuation on free
> > > memory, because too many pages are reclaimed when kswapd is invoked.
> > > Reason for this behaviour is that successive allocated CMA pages are
> > > on the LRU list in that order and kswapd reclaim them in same order.
> > > These memory doesn't help watermark checking from kwapd, so too many
> > > pages are reclaimed, I guess.
> >
> > This issue can be handle with some change around shrink code. I am
> > trying to integrate a patch for them.
> > But I am not sure we met the same issue. Do you mind give me more
> > info about this part?
>
> I forgot the issue because there is so big time-gap. I need sometime
> to bring issue back to my brain. I will answer it soon after some thinking.
Hello,
Yes, the issue I mentioned before can be handled by modifying shrink code.
I didn't dive into the problem so I also didn't know the detail. What
I know is that there is large fluctuation on memory statistics and
my guess is that it is caused by order of reclaimable pages. If we use
#1 approach, the bulk of cma pages used for page cache or something are
linked together and will be reclaimed all at once, because reclaiming cma
pages are not counted and watermark check still fails until normal
pages are reclaimed.
I think that round-robin approach is better. Reasons are on the
following:
1) Want to spread CMA freepages to whole users, not specific one user.
We can modify shirnk code not to reclaim pages on CMA, because it
doesn't help watermark checking in some cases. In this case, if we
don't use round-robin, one specific user whose mapping with CMA pages
can get all the benefit. Others would take all the overhead. I think that
spreading will make all users fair.
2) Using CMA freepages first needlessly imposes overhead to CMA user.
If the system has enough normal freepages, it is better not to use it
as much as possible.
Thanks.
Hello,
On Wed, Oct 29, 2014 at 03:43:33PM +0100, Vlastimil Babka wrote:
> On 10/16/2014 10:55 AM, Laura Abbott wrote:
> >On 10/15/2014 8:35 PM, Hui Zhu wrote:
> >
> >It's good to see another proposal to fix CMA utilization. Do you have
> >any data about the success rate of CMA contiguous allocation after
> >this patch series? I played around with a similar approach of using
> >CMA for MIGRATE_MOVABLE allocations and found that although utilization
> >did increase, contiguous allocations failed at a higher rate and were
> >much slower. I see what this series is trying to do with avoiding
> >allocation from CMA pages when a contiguous allocation is progress.
> >My concern is that there would still be problems with contiguous
> >allocation after all the MIGRATE_MOVABLE fallback has happened.
>
> Hi,
>
> did anyone try/suggest the following idea?
>
> - keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
> - when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA
> pageblocks have space, don't OOM immediately, but first try to
> migrate some MOVABLE pages to CMA pageblocks, to make space for the
> UNMOVABLE allocation in non-CMA pageblocks
> - this should keep CMA pageblocks free as long as possible and
> useful for CMA allocations, but without restricting the non-MOVABLE
> allocations even though there is free memory (but in CMA pageblocks)
> - the fact that a MOVABLE page could be successfully migrated to CMA
> pageblock, means it was not pinned or otherwise non-migratable, so
> there's a good chance it can be migrated back again if CMA
> pageblocks need to be used by CMA allocation
I suggested exactly same idea long time ago.
> - it's more complex, but I guess we have most of the necessary
> infrastructure in compaction already :)
I agree but still, it doesn't solve reclaim problem(ie, VM doesn't
need to reclaim CMA pages when memory pressure of unmovable pages
happens). Of course, we could make VM be aware of that via introducing
new flag of __isolate_lru_page.
However, I'd like to think CMA design from the beginning.
It made page allocation logic complicated, even very fragile as we
had recently and now we need to add new logics to migrate like you said.
As well, we need to fix reclaim path, too.
It makes mm complicated day by day even though it doesn't do the role
enough well(ie, big latency and frequent allocation failure) so I really
want to stop making the mess bloated.
Long time ago, when I saw Joonsoo's CMA agressive allocation patchset
(ie, roundrobin allocation between CMA and normal movable pages)
it was good to me at a first glance but it needs tweak of allocation
path and doesn't solve reclaim path, either. Yes, reclaim path could
be solved by another patch but I want to solve it altogether.
At that time, I suggested big surgery to Joonsoo in offline that
let's move CMA allocation with movable zone allocation. With it,
we could make allocation/reclaim path simple but thing is we should
make VM be aware of overlapping MOVABLE zone which means some of pages
in the zone could be part of another zones but I think we already have
logics to handle it when I read comment in isolate_freepages so I think
the design should work.
A thing you guys might worry is bigger CMA latency because it makes
CMA memory usage ratio higher than the approach you mentioned but
anyone couldn't guarantee it once memory is fully utilized.
In addition, we have used fair zone allocator policy so it makes
round robin allocation automatically so I believe it should be way
to go.
>
> Thoughts?
> Vlastimil
>
> >Thanks,
> >Laura
> >
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
--
Kind regards,
Minchan Kim
On Tue, Nov 4, 2014 at 3:53 PM, Minchan Kim <[email protected]> wrote:
> Hello,
>
> On Wed, Oct 29, 2014 at 03:43:33PM +0100, Vlastimil Babka wrote:
>> On 10/16/2014 10:55 AM, Laura Abbott wrote:
>> >On 10/15/2014 8:35 PM, Hui Zhu wrote:
>> >
>> >It's good to see another proposal to fix CMA utilization. Do you have
>> >any data about the success rate of CMA contiguous allocation after
>> >this patch series? I played around with a similar approach of using
>> >CMA for MIGRATE_MOVABLE allocations and found that although utilization
>> >did increase, contiguous allocations failed at a higher rate and were
>> >much slower. I see what this series is trying to do with avoiding
>> >allocation from CMA pages when a contiguous allocation is progress.
>> >My concern is that there would still be problems with contiguous
>> >allocation after all the MIGRATE_MOVABLE fallback has happened.
>>
>> Hi,
>>
>> did anyone try/suggest the following idea?
>>
>> - keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
>> - when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA
>> pageblocks have space, don't OOM immediately, but first try to
>> migrate some MOVABLE pages to CMA pageblocks, to make space for the
>> UNMOVABLE allocation in non-CMA pageblocks
>> - this should keep CMA pageblocks free as long as possible and
>> useful for CMA allocations, but without restricting the non-MOVABLE
>> allocations even though there is free memory (but in CMA pageblocks)
>> - the fact that a MOVABLE page could be successfully migrated to CMA
>> pageblock, means it was not pinned or otherwise non-migratable, so
>> there's a good chance it can be migrated back again if CMA
>> pageblocks need to be used by CMA allocation
>
> I suggested exactly same idea long time ago.
>
>> - it's more complex, but I guess we have most of the necessary
>> infrastructure in compaction already :)
>
> I agree but still, it doesn't solve reclaim problem(ie, VM doesn't
> need to reclaim CMA pages when memory pressure of unmovable pages
> happens). Of course, we could make VM be aware of that via introducing
> new flag of __isolate_lru_page.
>
> However, I'd like to think CMA design from the beginning.
> It made page allocation logic complicated, even very fragile as we
> had recently and now we need to add new logics to migrate like you said.
> As well, we need to fix reclaim path, too.
>
> It makes mm complicated day by day even though it doesn't do the role
> enough well(ie, big latency and frequent allocation failure) so I really
> want to stop making the mess bloated.
>
> Long time ago, when I saw Joonsoo's CMA agressive allocation patchset
> (ie, roundrobin allocation between CMA and normal movable pages)
> it was good to me at a first glance but it needs tweak of allocation
> path and doesn't solve reclaim path, either. Yes, reclaim path could
> be solved by another patch but I want to solve it altogether.
>
> At that time, I suggested big surgery to Joonsoo in offline that
> let's move CMA allocation with movable zone allocation. With it,
> we could make allocation/reclaim path simple but thing is we should
> make VM be aware of overlapping MOVABLE zone which means some of pages
> in the zone could be part of another zones but I think we already have
> logics to handle it when I read comment in isolate_freepages so I think
> the design should work.
Thanks.
>
> A thing you guys might worry is bigger CMA latency because it makes
> CMA memory usage ratio higher than the approach you mentioned but
> anyone couldn't guarantee it once memory is fully utilized.
> In addition, we have used fair zone allocator policy so it makes
> round robin allocation automatically so I believe it should be way
> to go.
Even if kernel use it to allocate the CMA memory, CMA alloc latency
will happen if most of memory is allocated and driver try to get CMA
memory.
https://lkml.org/lkml/2014/10/17/129
https://lkml.org/lkml/2014/10/17/130
These patches let cma_alloc do a shrink with function
shrink_all_memory_for_cma if need. It handle a lot of latency issue
in my part.
And I think it can be more configurable for example some device use it
and others not.
Thanks,
Hui
>
>>
>> Thoughts?
>> Vlastimil
>>
>> >Thanks,
>> >Laura
>> >
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to [email protected]. For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
>
> --
> Kind regards,
> Minchan Kim
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
On 11/04/2014 08:53 AM, Minchan Kim wrote:
> Hello,
>
> On Wed, Oct 29, 2014 at 03:43:33PM +0100, Vlastimil Babka wrote:
>> On 10/16/2014 10:55 AM, Laura Abbott wrote:
>>
>> Hi,
>>
>> did anyone try/suggest the following idea?
>>
>> - keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
>> - when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA
>> pageblocks have space, don't OOM immediately, but first try to
>> migrate some MOVABLE pages to CMA pageblocks, to make space for the
>> UNMOVABLE allocation in non-CMA pageblocks
>> - this should keep CMA pageblocks free as long as possible and
>> useful for CMA allocations, but without restricting the non-MOVABLE
>> allocations even though there is free memory (but in CMA pageblocks)
>> - the fact that a MOVABLE page could be successfully migrated to CMA
>> pageblock, means it was not pinned or otherwise non-migratable, so
>> there's a good chance it can be migrated back again if CMA
>> pageblocks need to be used by CMA allocation
>
> I suggested exactly same idea long time ago.
>
>> - it's more complex, but I guess we have most of the necessary
>> infrastructure in compaction already :)
>
> I agree but still, it doesn't solve reclaim problem(ie, VM doesn't
> need to reclaim CMA pages when memory pressure of unmovable pages
> happens). Of course, we could make VM be aware of that via introducing
> new flag of __isolate_lru_page.
Well, if it relaims CMA pages, then it has to be followed by the
migration. Is that better or worse than breaking LRU assumptions by
reclaiming based on where the page is located? I thought this was
basically what lumpy reclaim did, and it was removed.
> However, I'd like to think CMA design from the beginning.
> It made page allocation logic complicated, even very fragile as we
> had recently and now we need to add new logics to migrate like you said.
> As well, we need to fix reclaim path, too.
>
> It makes mm complicated day by day even though it doesn't do the role
> enough well(ie, big latency and frequent allocation failure) so I really
> want to stop making the mess bloated.
Yeah that would be great.
> Long time ago, when I saw Joonsoo's CMA agressive allocation patchset
> (ie, roundrobin allocation between CMA and normal movable pages)
> it was good to me at a first glance but it needs tweak of allocation
> path and doesn't solve reclaim path, either. Yes, reclaim path could
> be solved by another patch but I want to solve it altogether.
>
> At that time, I suggested big surgery to Joonsoo in offline that
> let's move CMA allocation with movable zone allocation. With it,
> we could make allocation/reclaim path simple but thing is we should
I'm not sure I understand enough from this. You want to introduce a
movable zone instead of CMA pageblocks? But how to size it, resize it,
would it be possible?
> make VM be aware of overlapping MOVABLE zone which means some of pages
> in the zone could be part of another zones but I think we already have
> logics to handle it when I read comment in isolate_freepages so I think
> the design should work.
Why would it overlap in the first place? Just because it wouldn't be
sized on pageblock boundary? Or to make (re)sizing simpler? Yeah we
could probably handle that, but it's not completely for free (you
iterate over blocks/pages uselessly).
> A thing you guys might worry is bigger CMA latency because it makes
> CMA memory usage ratio higher than the approach you mentioned but
> anyone couldn't guarantee it once memory is fully utilized.
> In addition, we have used fair zone allocator policy so it makes
> round robin allocation automatically so I believe it should be way
> to go.
Yeah maybe it could be simpler in the end. Although a new zone type
could be a disturbing change, with some overhead to per-cpu structures
etc. The allocations in that zone would be somewhat at disadvantage wrt
LRU, as CMA allocation would mostly reclaim them instead of migrating
away (assuming there wouldn't be so much spare space for migration as
when CMA pageblocks are part of a much larger zone). But I guess the
same could be said about the DMA zone...
>>
>> Thoughts?
>> Vlastimil
>>
>>> Thanks,
>>> Laura
>>>
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to [email protected]. For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
>
Hello,
On Tue, Nov 04, 2014 at 10:29:59AM +0100, Vlastimil Babka wrote:
> On 11/04/2014 08:53 AM, Minchan Kim wrote:
> >Hello,
> >
> >On Wed, Oct 29, 2014 at 03:43:33PM +0100, Vlastimil Babka wrote:
> >>On 10/16/2014 10:55 AM, Laura Abbott wrote:
> >>
> >>Hi,
> >>
> >>did anyone try/suggest the following idea?
> >>
> >>- keep CMA as fallback to MOVABLE as is is now, i.e. non-agressive
> >>- when UNMOVABLE (RECLAIMABLE also?) allocation fails and CMA
> >>pageblocks have space, don't OOM immediately, but first try to
> >>migrate some MOVABLE pages to CMA pageblocks, to make space for the
> >>UNMOVABLE allocation in non-CMA pageblocks
> >>- this should keep CMA pageblocks free as long as possible and
> >>useful for CMA allocations, but without restricting the non-MOVABLE
> >>allocations even though there is free memory (but in CMA pageblocks)
> >>- the fact that a MOVABLE page could be successfully migrated to CMA
> >>pageblock, means it was not pinned or otherwise non-migratable, so
> >>there's a good chance it can be migrated back again if CMA
> >>pageblocks need to be used by CMA allocation
> >
> >I suggested exactly same idea long time ago.
> >
> >>- it's more complex, but I guess we have most of the necessary
> >>infrastructure in compaction already :)
> >
> >I agree but still, it doesn't solve reclaim problem(ie, VM doesn't
> >need to reclaim CMA pages when memory pressure of unmovable pages
> >happens). Of course, we could make VM be aware of that via introducing
> >new flag of __isolate_lru_page.
>
> Well, if it relaims CMA pages, then it has to be followed by the
> migration. Is that better or worse than breaking LRU assumptions by
> reclaiming based on where the page is located? I thought this was
> basically what lumpy reclaim did, and it was removed.
It would work and it might cost for using for CMA because CMA already
can migrate/discard lots of pages, which will hurt LRU assumption.
However, I don't think it's optimal.
>
> >However, I'd like to think CMA design from the beginning.
> >It made page allocation logic complicated, even very fragile as we
> >had recently and now we need to add new logics to migrate like you said.
> >As well, we need to fix reclaim path, too.
> >
> >It makes mm complicated day by day even though it doesn't do the role
> >enough well(ie, big latency and frequent allocation failure) so I really
> >want to stop making the mess bloated.
>
> Yeah that would be great.
>
> >Long time ago, when I saw Joonsoo's CMA agressive allocation patchset
> >(ie, roundrobin allocation between CMA and normal movable pages)
> >it was good to me at a first glance but it needs tweak of allocation
> >path and doesn't solve reclaim path, either. Yes, reclaim path could
> >be solved by another patch but I want to solve it altogether.
> >
> >At that time, I suggested big surgery to Joonsoo in offline that
> >let's move CMA allocation with movable zone allocation. With it,
> >we could make allocation/reclaim path simple but thing is we should
>
> I'm not sure I understand enough from this. You want to introduce a
> movable zone instead of CMA pageblocks? But how to size it, resize
> it, would it be possible?
Why do we need to care of resizing?
All of CMA pages are reserved by using memblock during boot.
If we can set the zone size after that, maybe we don't need to
resize the zone.
>
> >make VM be aware of overlapping MOVABLE zone which means some of pages
> >in the zone could be part of another zones but I think we already have
> >logics to handle it when I read comment in isolate_freepages so I think
> >the design should work.
>
> Why would it overlap in the first place? Just because it wouldn't be
> sized on pageblock boundary? Or to make (re)sizing simpler? Yeah we
> could probably handle that, but it's not completely for free (you
> iterate over blocks/pages uselessly).
Reserved pages for CMA are spread over the system memory.
So zones could overlap each other so we need check that overlapping
like pageblock_pfn_to_page while we need to walk pfn in order.
It's not free but it would add the overhead pfn-order walking
like compaction, which is not hot path.
>
> >A thing you guys might worry is bigger CMA latency because it makes
> >CMA memory usage ratio higher than the approach you mentioned but
> >anyone couldn't guarantee it once memory is fully utilized.
> >In addition, we have used fair zone allocator policy so it makes
> >round robin allocation automatically so I believe it should be way
> >to go.
>
> Yeah maybe it could be simpler in the end. Although a new zone type
> could be a disturbing change, with some overhead to per-cpu
> structures etc. The allocations in that zone would be somewhat at
> disadvantage wrt LRU, as CMA allocation would mostly reclaim them
> instead of migrating away (assuming there wouldn't be so much spare
> space for migration as when CMA pageblocks are part of a much larger
> zone). But I guess the same could be said about the DMA zone...
What do you mean "CMA allocation"?
If you meant movable pages allocation like userspace page, it would
be round-robin by fair zone policy.
If you meant device request for contiguous memory allocation so
we should reclaim CMA pages due to lack of spare memory, we don't
have no choice. IOW, it's trade-off for using CMA.
>
> >>
> >>Thoughts?
> >>Vlastimil
> >>
> >>>Thanks,
> >>>Laura
> >>>
> >>
> >>--
> >>To unsubscribe, send a message with 'unsubscribe linux-mm' in
> >>the body to [email protected]. For more info on Linux MM,
> >>see: http://www.linux-mm.org/ .
> >>Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
> >
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
--
Kind regards,
Minchan Kim
On Fri, Oct 24, 2014 at 1:28 PM, Joonsoo Kim <[email protected]> wrote:
> On Thu, Oct 16, 2014 at 11:35:51AM +0800, Hui Zhu wrote:
>> If page alloc function __rmqueue try to get pages from MIGRATE_MOVABLE and
>> conditions (cma_alloc_counter, cma_aggressive_free_min, cma_alloc_counter)
>> allow, MIGRATE_CMA will be allocated as MIGRATE_MOVABLE first.
>>
>> Signed-off-by: Hui Zhu <[email protected]>
>> ---
>> mm/page_alloc.c | 42 +++++++++++++++++++++++++++++++-----------
>> 1 file changed, 31 insertions(+), 11 deletions(-)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 736d8e1..87bc326 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -65,6 +65,10 @@
>> #include <asm/div64.h>
>> #include "internal.h"
>>
>> +#ifdef CONFIG_CMA_AGGRESSIVE
>> +#include <linux/cma.h>
>> +#endif
>> +
>> /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
>> static DEFINE_MUTEX(pcp_batch_high_lock);
>> #define MIN_PERCPU_PAGELIST_FRACTION (8)
>> @@ -1189,20 +1193,36 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
>> {
>> struct page *page;
>>
>> -retry_reserve:
>> +#ifdef CONFIG_CMA_AGGRESSIVE
>> + if (cma_aggressive_switch
>> + && migratetype == MIGRATE_MOVABLE
>> + && atomic_read(&cma_alloc_counter) == 0
>> + && global_page_state(NR_FREE_CMA_PAGES) > cma_aggressive_free_min
>> + + (1 << order))
>> + migratetype = MIGRATE_CMA;
>> +#endif
>> +retry:
>
> I don't get it why cma_alloc_counter should be tested.
> When cma alloc is progress, pageblock is isolated so that pages on that
> pageblock cannot be allocated. Why should we prevent aggressive
> allocation in this case?
>
Hi Joonsoo,
Even if the pageblock is isolated in the begin of function
alloc_contig_range, it will unisolate if alloc_contig_range get some
error for example "PFNs busy". And the cma_alloc will keep call
alloc_contig_range with another address if need.
So it will decrease the contradiction between CMA allocation in
cma_alloc and __rmqueue with cma_alloc_counter.
Thanks,
Hui
> Thanks.
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>