2021-04-08 04:00:40

by Roman Gushchin

[permalink] [raw]
Subject: [PATCH v3 4/6] percpu: generalize pcpu_balance_populated()

To prepare for the depopulation of percpu chunks, split out the
populating part of the pcpu_balance_populated() into the new
pcpu_grow_populated() (with an intention to add
pcpu_shrink_populated() in the next commit).

The goal of pcpu_balance_populated() is to determine whether
there is a shortage or an excessive amount of empty percpu pages
and call into the corresponding function.

pcpu_grow_populated() takes a desired number of pages as an argument
(nr_to_pop). If it creates a new chunk, nr_to_pop should be updated
to reflect that the new chunk could be created already populated.
Otherwise an infinite loop might appear.

Signed-off-by: Roman Gushchin <[email protected]>
---
mm/percpu.c | 63 +++++++++++++++++++++++++++++++++--------------------
1 file changed, 39 insertions(+), 24 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index 61339b3d9337..e20119668c42 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1979,7 +1979,7 @@ static void pcpu_balance_free(enum pcpu_chunk_type type)
}

/**
- * pcpu_balance_populated - manage the amount of populated pages
+ * pcpu_grow_populated - populate chunk(s) to satisfy atomic allocations
* @type: chunk type
*
* Maintain a certain amount of populated pages to satisfy atomic allocations.
@@ -1988,35 +1988,15 @@ static void pcpu_balance_free(enum pcpu_chunk_type type)
* allocation causes the failure as it is possible that requests can be
* serviced from already backed regions.
*/
-static void pcpu_balance_populated(enum pcpu_chunk_type type)
+static void pcpu_grow_populated(enum pcpu_chunk_type type, int nr_to_pop)
{
/* gfp flags passed to underlying allocators */
const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
struct list_head *pcpu_slot = pcpu_chunk_list(type);
struct pcpu_chunk *chunk;
- int slot, nr_to_pop, ret;
+ int slot, ret;

- /*
- * Ensure there are certain number of free populated pages for
- * atomic allocs. Fill up from the most packed so that atomic
- * allocs don't increase fragmentation. If atomic allocation
- * failed previously, always populate the maximum amount. This
- * should prevent atomic allocs larger than PAGE_SIZE from keeping
- * failing indefinitely; however, large atomic allocs are not
- * something we support properly and can be highly unreliable and
- * inefficient.
- */
retry_pop:
- if (pcpu_atomic_alloc_failed) {
- nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
- /* best effort anyway, don't worry about synchronization */
- pcpu_atomic_alloc_failed = false;
- } else {
- nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
- pcpu_nr_empty_pop_pages[type],
- 0, PCPU_EMPTY_POP_PAGES_HIGH);
- }
-
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
unsigned int nr_unpop = 0, rs, re;

@@ -2060,12 +2040,47 @@ static void pcpu_balance_populated(enum pcpu_chunk_type type)
if (chunk) {
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1);
+ nr_to_pop = max_t(int, 0, nr_to_pop - chunk->nr_populated);
spin_unlock_irq(&pcpu_lock);
- goto retry_pop;
+ if (nr_to_pop)
+ goto retry_pop;
}
}
}

+/**
+ * pcpu_balance_populated - manage the amount of populated pages
+ * @type: chunk type
+ *
+ * Populate or depopulate chunks to maintain a certain amount
+ * of free pages to satisfy atomic allocations, but not waste
+ * large amounts of memory.
+ */
+static void pcpu_balance_populated(enum pcpu_chunk_type type)
+{
+ int nr_to_pop;
+
+ /*
+ * Ensure there are certain number of free populated pages for
+ * atomic allocs. Fill up from the most packed so that atomic
+ * allocs don't increase fragmentation. If atomic allocation
+ * failed previously, always populate the maximum amount. This
+ * should prevent atomic allocs larger than PAGE_SIZE from keeping
+ * failing indefinitely; however, large atomic allocs are not
+ * something we support properly and can be highly unreliable and
+ * inefficient.
+ */
+ if (pcpu_atomic_alloc_failed) {
+ nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
+ /* best effort anyway, don't worry about synchronization */
+ pcpu_atomic_alloc_failed = false;
+ pcpu_grow_populated(type, nr_to_pop);
+ } else if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_HIGH) {
+ nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH - pcpu_nr_empty_pop_pages[type];
+ pcpu_grow_populated(type, nr_to_pop);
+ }
+}
+
/**
* pcpu_balance_workfn - manage the amount of free chunks and populated pages
* @work: unused
--
2.30.2


2021-04-16 22:07:48

by Dennis Zhou

[permalink] [raw]
Subject: Re: [PATCH v3 4/6] percpu: generalize pcpu_balance_populated()

Hello,

On Wed, Apr 07, 2021 at 08:57:34PM -0700, Roman Gushchin wrote:
> To prepare for the depopulation of percpu chunks, split out the
> populating part of the pcpu_balance_populated() into the new
> pcpu_grow_populated() (with an intention to add
> pcpu_shrink_populated() in the next commit).
>
> The goal of pcpu_balance_populated() is to determine whether
> there is a shortage or an excessive amount of empty percpu pages
> and call into the corresponding function.
>
> pcpu_grow_populated() takes a desired number of pages as an argument
> (nr_to_pop). If it creates a new chunk, nr_to_pop should be updated
> to reflect that the new chunk could be created already populated.
> Otherwise an infinite loop might appear.
>
> Signed-off-by: Roman Gushchin <[email protected]>
> ---
> mm/percpu.c | 63 +++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 39 insertions(+), 24 deletions(-)
>
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 61339b3d9337..e20119668c42 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1979,7 +1979,7 @@ static void pcpu_balance_free(enum pcpu_chunk_type type)
> }
>
> /**
> - * pcpu_balance_populated - manage the amount of populated pages
> + * pcpu_grow_populated - populate chunk(s) to satisfy atomic allocations
> * @type: chunk type
> *
> * Maintain a certain amount of populated pages to satisfy atomic allocations.
> @@ -1988,35 +1988,15 @@ static void pcpu_balance_free(enum pcpu_chunk_type type)
> * allocation causes the failure as it is possible that requests can be
> * serviced from already backed regions.
> */
> -static void pcpu_balance_populated(enum pcpu_chunk_type type)
> +static void pcpu_grow_populated(enum pcpu_chunk_type type, int nr_to_pop)
> {
> /* gfp flags passed to underlying allocators */
> const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
> struct list_head *pcpu_slot = pcpu_chunk_list(type);
> struct pcpu_chunk *chunk;
> - int slot, nr_to_pop, ret;
> + int slot, ret;
>
> - /*
> - * Ensure there are certain number of free populated pages for
> - * atomic allocs. Fill up from the most packed so that atomic
> - * allocs don't increase fragmentation. If atomic allocation
> - * failed previously, always populate the maximum amount. This
> - * should prevent atomic allocs larger than PAGE_SIZE from keeping
> - * failing indefinitely; however, large atomic allocs are not
> - * something we support properly and can be highly unreliable and
> - * inefficient.
> - */
> retry_pop:
> - if (pcpu_atomic_alloc_failed) {
> - nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
> - /* best effort anyway, don't worry about synchronization */
> - pcpu_atomic_alloc_failed = false;
> - } else {
> - nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
> - pcpu_nr_empty_pop_pages[type],
> - 0, PCPU_EMPTY_POP_PAGES_HIGH);
> - }
> -
> for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
> unsigned int nr_unpop = 0, rs, re;
>
> @@ -2060,12 +2040,47 @@ static void pcpu_balance_populated(enum pcpu_chunk_type type)
> if (chunk) {
> spin_lock_irq(&pcpu_lock);
> pcpu_chunk_relocate(chunk, -1);
> + nr_to_pop = max_t(int, 0, nr_to_pop - chunk->nr_populated);
> spin_unlock_irq(&pcpu_lock);
> - goto retry_pop;
> + if (nr_to_pop)
> + goto retry_pop;
> }
> }
> }
>
> +/**
> + * pcpu_balance_populated - manage the amount of populated pages
> + * @type: chunk type
> + *
> + * Populate or depopulate chunks to maintain a certain amount
> + * of free pages to satisfy atomic allocations, but not waste
> + * large amounts of memory.
> + */
> +static void pcpu_balance_populated(enum pcpu_chunk_type type)
> +{
> + int nr_to_pop;
> +
> + /*
> + * Ensure there are certain number of free populated pages for
> + * atomic allocs. Fill up from the most packed so that atomic
> + * allocs don't increase fragmentation. If atomic allocation
> + * failed previously, always populate the maximum amount. This
> + * should prevent atomic allocs larger than PAGE_SIZE from keeping
> + * failing indefinitely; however, large atomic allocs are not
> + * something we support properly and can be highly unreliable and
> + * inefficient.
> + */
> + if (pcpu_atomic_alloc_failed) {
> + nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
> + /* best effort anyway, don't worry about synchronization */
> + pcpu_atomic_alloc_failed = false;
> + pcpu_grow_populated(type, nr_to_pop);
> + } else if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_HIGH) {
> + nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH - pcpu_nr_empty_pop_pages[type];
> + pcpu_grow_populated(type, nr_to_pop);
> + }
> +}
> +
> /**
> * pcpu_balance_workfn - manage the amount of free chunks and populated pages
> * @work: unused
> --
> 2.30.2
>

I've applied this for-5.14.

Thanks,
Dennis