Currently we have to hold the single cma_mutex when alloc cma pages,
it is ok when there is only one cma area in system.
However, when there are several cma areas, such as in our Android smart
phone, the single cma_mutex prevents concurrent cma page allocation.
This patch removes the single cma_mutex and uses per-cma area alloc_lock,
this allows concurrent cma pages allocation for different cma areas while
protects access to the same pageblocks.
Signed-off-by: Weijie Yang <[email protected]>
---
mm/cma.c | 6 +++---
mm/cma.h | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/mm/cma.c b/mm/cma.c
index 3a7a67b..eaf1afe 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -41,7 +41,6 @@
struct cma cma_areas[MAX_CMA_AREAS];
unsigned cma_area_count;
-static DEFINE_MUTEX(cma_mutex);
phys_addr_t cma_get_base(const struct cma *cma)
{
@@ -128,6 +127,7 @@ static int __init cma_activate_area(struct cma *cma)
} while (--i);
mutex_init(&cma->lock);
+ mutex_init(&cma->alloc_lock);
#ifdef CONFIG_CMA_DEBUGFS
INIT_HLIST_HEAD(&cma->mem_head);
@@ -398,9 +398,9 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
mutex_unlock(&cma->lock);
pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
- mutex_lock(&cma_mutex);
+ mutex_lock(&cma->alloc_lock);
ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- mutex_unlock(&cma_mutex);
+ mutex_unlock(&cma->alloc_lock);
if (ret == 0) {
page = pfn_to_page(pfn);
break;
diff --git a/mm/cma.h b/mm/cma.h
index 1132d73..2084c9f 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -7,6 +7,7 @@ struct cma {
unsigned long *bitmap;
unsigned int order_per_bit; /* Order of pages represented by one bit */
struct mutex lock;
+ struct mutex alloc_lock;
#ifdef CONFIG_CMA_DEBUGFS
struct hlist_head mem_head;
spinlock_t mem_head_lock;
--
1.7.10.4
On 06/05/2015 01:01 AM, Weijie Yang wrote:
> Currently we have to hold the single cma_mutex when alloc cma pages,
> it is ok when there is only one cma area in system.
> However, when there are several cma areas, such as in our Android smart
> phone, the single cma_mutex prevents concurrent cma page allocation.
>
> This patch removes the single cma_mutex and uses per-cma area alloc_lock,
> this allows concurrent cma pages allocation for different cma areas while
> protects access to the same pageblocks.
>
> Signed-off-by: Weijie Yang <[email protected]>
Last I knew alloc_contig_range needed to be serialized which is why we
still had the global CMA mutex. https://lkml.org/lkml/2014/2/18/462
So NAK unless something has changed to allow this.
Laura
> ---
> mm/cma.c | 6 +++---
> mm/cma.h | 1 +
> 2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/mm/cma.c b/mm/cma.c
> index 3a7a67b..eaf1afe 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -41,7 +41,6 @@
>
> struct cma cma_areas[MAX_CMA_AREAS];
> unsigned cma_area_count;
> -static DEFINE_MUTEX(cma_mutex);
>
> phys_addr_t cma_get_base(const struct cma *cma)
> {
> @@ -128,6 +127,7 @@ static int __init cma_activate_area(struct cma *cma)
> } while (--i);
>
> mutex_init(&cma->lock);
> + mutex_init(&cma->alloc_lock);
>
> #ifdef CONFIG_CMA_DEBUGFS
> INIT_HLIST_HEAD(&cma->mem_head);
> @@ -398,9 +398,9 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
> mutex_unlock(&cma->lock);
>
> pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
> - mutex_lock(&cma_mutex);
> + mutex_lock(&cma->alloc_lock);
> ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
> - mutex_unlock(&cma_mutex);
> + mutex_unlock(&cma->alloc_lock);
> if (ret == 0) {
> page = pfn_to_page(pfn);
> break;
> diff --git a/mm/cma.h b/mm/cma.h
> index 1132d73..2084c9f 100644
> --- a/mm/cma.h
> +++ b/mm/cma.h
> @@ -7,6 +7,7 @@ struct cma {
> unsigned long *bitmap;
> unsigned int order_per_bit; /* Order of pages represented by one bit */
> struct mutex lock;
> + struct mutex alloc_lock;
> #ifdef CONFIG_CMA_DEBUGFS
> struct hlist_head mem_head;
> spinlock_t mem_head_lock;
>
On Fri, Jun 05 2015, Laura Abbott wrote:
> On 06/05/2015 01:01 AM, Weijie Yang wrote:
>> Currently we have to hold the single cma_mutex when alloc cma pages,
>> it is ok when there is only one cma area in system.
>> However, when there are several cma areas, such as in our Android smart
>> phone, the single cma_mutex prevents concurrent cma page allocation.
>>
>> This patch removes the single cma_mutex and uses per-cma area alloc_lock,
>> this allows concurrent cma pages allocation for different cma areas while
>> protects access to the same pageblocks.
>>
>> Signed-off-by: Weijie Yang <[email protected]>
> Last I knew alloc_contig_range needed to be serialized which is why we
> still had the global CMA mutex. https://lkml.org/lkml/2014/2/18/462
>
> So NAK unless something has changed to allow this.
This patch should be fine.
Change you’ve pointed to would get rid of any serialisation around
alloc_contig_range which is dangerous, but since CMA regions are
pageblock-aligned:
/*
* Sanitise input arguments.
* Pages both ends in CMA area could be merged into adjacent unmovable
* migratetype page by page allocator's buddy algorithm. In the case,
* you couldn't get a contiguous memory, which is not what we want.
*/
alignment = max(alignment,
(phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
base = ALIGN(base, alignment);
size = ALIGN(size, alignment);
limit &= ~(alignment - 1);
synchronising allocation in each area should work fine.
>> ---
>> mm/cma.c | 6 +++---
>> mm/cma.h | 1 +
>> 2 files changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/mm/cma.c b/mm/cma.c
>> index 3a7a67b..eaf1afe 100644
>> --- a/mm/cma.c
>> +++ b/mm/cma.c
>> @@ -41,7 +41,6 @@
>>
>> struct cma cma_areas[MAX_CMA_AREAS];
>> unsigned cma_area_count;
>> -static DEFINE_MUTEX(cma_mutex);
>>
>> phys_addr_t cma_get_base(const struct cma *cma)
>> {
>> @@ -128,6 +127,7 @@ static int __init cma_activate_area(struct cma *cma)
>> } while (--i);
>>
>> mutex_init(&cma->lock);
Since now we have two mutexes in the structure, rename this one to
bitmap_lock.
>> + mutex_init(&cma->alloc_lock);
>>
>> #ifdef CONFIG_CMA_DEBUGFS
>> INIT_HLIST_HEAD(&cma->mem_head);
>> @@ -398,9 +398,9 @@ struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align)
>> mutex_unlock(&cma->lock);
>>
>> pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
>> - mutex_lock(&cma_mutex);
>> + mutex_lock(&cma->alloc_lock);
>> ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
>> - mutex_unlock(&cma_mutex);
>> + mutex_unlock(&cma->alloc_lock);
>> if (ret == 0) {
>> page = pfn_to_page(pfn);
>> break;
>> diff --git a/mm/cma.h b/mm/cma.h
>> index 1132d73..2084c9f 100644
>> --- a/mm/cma.h
>> +++ b/mm/cma.h
>> @@ -7,6 +7,7 @@ struct cma {
>> unsigned long *bitmap;
>> unsigned int order_per_bit; /* Order of pages represented by one bit */
>> struct mutex lock;
>> + struct mutex alloc_lock;
>> #ifdef CONFIG_CMA_DEBUGFS
>> struct hlist_head mem_head;
>> spinlock_t mem_head_lock;
--
Best regards, _ _
.o. | Liege of Serenely Enlightened Majesty of o' \,=./ `o
..o | Computer Science, Michał “mina86” Nazarewicz (o o)
ooo +--<[email protected]>--<xmpp:[email protected]>--ooO--(_)--Ooo--
On 06/05/2015 10:19 AM, Michał Nazarewicz wrote:
> On Fri, Jun 05 2015, Laura Abbott wrote:
>> On 06/05/2015 01:01 AM, Weijie Yang wrote:
>>> Currently we have to hold the single cma_mutex when alloc cma pages,
>>> it is ok when there is only one cma area in system.
>>> However, when there are several cma areas, such as in our Android smart
>>> phone, the single cma_mutex prevents concurrent cma page allocation.
>>>
>>> This patch removes the single cma_mutex and uses per-cma area alloc_lock,
>>> this allows concurrent cma pages allocation for different cma areas while
>>> protects access to the same pageblocks.
>>>
>>> Signed-off-by: Weijie Yang <[email protected]>
>
>> Last I knew alloc_contig_range needed to be serialized which is why we
>> still had the global CMA mutex. https://lkml.org/lkml/2014/2/18/462
>>
>> So NAK unless something has changed to allow this.
>
> This patch should be fine.
>
> Change you’ve pointed to would get rid of any serialisation around
> alloc_contig_range which is dangerous, but since CMA regions are
> pageblock-aligned:
>
> /*
> * Sanitise input arguments.
> * Pages both ends in CMA area could be merged into adjacent unmovable
> * migratetype page by page allocator's buddy algorithm. In the case,
> * you couldn't get a contiguous memory, which is not what we want.
> */
> alignment = max(alignment,
> (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
> base = ALIGN(base, alignment);
> size = ALIGN(size, alignment);
> limit &= ~(alignment - 1);
>
> synchronising allocation in each area should work fine.
>
Okay yes, you are correct. I was somehow thinking that different CMA regions
could end up in the same pageblock. This is documented in alloc_contig_range
but can we put a comment explaining this here too? It seems to come up
every time locking here is discussed.
Thanks,
Laura
On Fri, Jun 05 2015, Laura Abbott wrote:
> but can we put a comment explaining this here too?
Sounds good to me. I would even document why we have two locks in case
someone decides to merge them. Perhaps something like (haven’t tested
or even compiled):
-------- >8 ------------------------------------------------------------
Subject: cma: allow concurrent allocation for different CMA regions
Currently we have to hold a single cma_mutex when allocating CMA areas.
When there are multiple CMA regions, the single cma_mutex prevents
concurrent CMA allocation.
This patch replaces the single cma_mutex with a per-CMA region
alloc_lock. This allows concurrent CMA allocation for different CMA
regions while protects access to the same pageblocks.
Signed-off-by: Weijie Yang <[email protected]>
Signed-off-by: Michal Nazarewiz <[email protected]>
[mina86: renamed cma->lock to cma->bitmap_lock, added locks documentation]
---
mm/cma.c | 18 +++++++++---------
mm/cma.h | 16 +++++++++++++++-
2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/mm/cma.c b/mm/cma.c
index 3a7a67b..841fe07 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -41,7 +41,6 @@
struct cma cma_areas[MAX_CMA_AREAS];
unsigned cma_area_count;
-static DEFINE_MUTEX(cma_mutex);
phys_addr_t cma_get_base(const struct cma *cma)
{
@@ -89,9 +88,9 @@ static void cma_clear_bitmap(struct cma *cma,
unsigned long pfn,
bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
- mutex_lock(&cma->lock);
+ mutex_lock(&cma->bitmap_lock);
bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
- mutex_unlock(&cma->lock);
+ mutex_unlock(&cma->bitmap_lock);
}
static int __init cma_activate_area(struct cma *cma)
@@ -127,7 +126,8 @@ static int __init cma_activate_area(struct cma *cma)
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);
- mutex_init(&cma->lock);
+ mutex_init(&cma->alloc_lock);
+ mutex_init(&cma->bitmap_lock);
#ifdef CONFIG_CMA_DEBUGFS
INIT_HLIST_HEAD(&cma->mem_head);
@@ -381,12 +381,12 @@ struct page *cma_alloc(struct cma *cma, unsigned
int count, unsigned int align)
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
for (;;) {
- mutex_lock(&cma->lock);
+ mutex_lock(&cma->bitmap_lock);
bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
bitmap_maxno, start, bitmap_count, mask,
offset);
if (bitmap_no >= bitmap_maxno) {
- mutex_unlock(&cma->lock);
+ mutex_unlock(&cma->bitmap_lock);
break;
}
bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
@@ -395,12 +395,12 @@ struct page *cma_alloc(struct cma *cma, unsigned
int count, unsigned int align)
* our exclusive use. If the migration fails we will take the
* lock again and unmark it.
*/
- mutex_unlock(&cma->lock);
+ mutex_unlock(&cma->bitmap_lock);
pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
- mutex_lock(&cma_mutex);
+ mutex_lock(&cma->alloc_lock);
ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA);
- mutex_unlock(&cma_mutex);
+ mutex_unlock(&cma->alloc_lock);
if (ret == 0) {
page = pfn_to_page(pfn);
break;
diff --git a/mm/cma.h b/mm/cma.h
index 1132d73..b585ba1 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -6,7 +6,21 @@ struct cma {
unsigned long count;
unsigned long *bitmap;
unsigned int order_per_bit; /* Order of pages represented by one bit */
- struct mutex lock;
+ /*
+ * alloc_lock protects calls to alloc_contig_range. The function must
+ * not be called concurrently on the same pageblock which is why it has
+ * to be synchronised. On the other hand, because each CMA region is
+ * pageblock-aligned we can have per-region alloc_locks since they never
+ * share a pageblock.
+ */
+ struct mutex alloc_lock;
+ /*
+ * As the name suggests, bitmap_lock protects the bitmap. It has to be
+ * a separate lock from alloc_lock so that we can free CMA areas (which
+ * only requires bitmap_lock) while allocating pages (which requires
+ * alloc_lock) is ongoing.
+ */
+ struct mutex bitmap_lock;
#ifdef CONFIG_CMA_DEBUGFS
struct hlist_head mem_head;
spinlock_t mem_head_lock;
--
Best regards, _ _
.o. | Liege of Serenely Enlightened Majesty of o' \,=./ `o
..o | Computer Science, Michał “mina86” Nazarewicz (o o)
ooo +--<[email protected]>--<xmpp:[email protected]>--ooO--(_)--Ooo--