When an atomic pool becomes fully depleted because it is now relied upon
for all non-blocking allocations through the DMA API, allow background
expansion of each pool by a kworker.
When an atomic pool has less than the default size of memory left, kick
off a kworker to dynamically expand the pool in the background. The pool
is doubled in size, up to MAX_ORDER-1. If memory cannot be allocated at
the requested order, smaller allocation(s) are attempted.
This allows the default size to be kept quite low when one or more of the
atomic pools is not used.
This also allows __dma_atomic_pool_init to return a pointer to the pool
to make initialization cleaner.
Also switch over some node ids to the more appropriate NUMA_NO_NODE.
Signed-off-by: David Rientjes <[email protected]>
---
kernel/dma/pool.c | 120 +++++++++++++++++++++++++++++++---------------
1 file changed, 82 insertions(+), 38 deletions(-)
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 9b806f5eded8..e14c5a2da734 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -11,13 +11,17 @@
#include <linux/genalloc.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
static struct gen_pool *atomic_pool_dma __ro_after_init;
static struct gen_pool *atomic_pool_dma32 __ro_after_init;
static struct gen_pool *atomic_pool_kernel __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
-static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+/* Dynamic background expansion when the atomic pool is near capacity */
+static struct work_struct atomic_pool_work;
static int __init early_coherent_pool(char *p)
{
@@ -26,76 +30,114 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
-static int __init __dma_atomic_pool_init(struct gen_pool **pool,
- size_t pool_size, gfp_t gfp)
+static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
+ gfp_t gfp)
{
- const unsigned int order = get_order(pool_size);
- const unsigned long nr_pages = pool_size >> PAGE_SHIFT;
+ unsigned int order;
struct page *page;
void *addr;
- int ret;
+ int ret = -ENOMEM;
+
+ /* Cannot allocate larger than MAX_ORDER-1 */
+ order = min(get_order(pool_size), MAX_ORDER-1);
+
+ do {
+ pool_size = 1 << (PAGE_SHIFT + order);
- if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, nr_pages, order, false);
- else
- page = alloc_pages(gfp, order);
+ if (dev_get_cma_area(NULL))
+ page = dma_alloc_from_contiguous(NULL, 1 << order,
+ order, false);
+ else
+ page = alloc_pages(gfp, order);
+ } while (!page && order-- > 0);
if (!page)
goto out;
arch_dma_prep_coherent(page, pool_size);
- *pool = gen_pool_create(PAGE_SHIFT, -1);
- if (!*pool)
- goto free_page;
-
addr = dma_common_contiguous_remap(page, pool_size,
pgprot_dmacoherent(PAGE_KERNEL),
__builtin_return_address(0));
if (!addr)
- goto destroy_genpool;
+ goto free_page;
- ret = gen_pool_add_virt(*pool, (unsigned long)addr, page_to_phys(page),
- pool_size, -1);
+ ret = gen_pool_add_virt(pool, (unsigned long)addr, page_to_phys(page),
+ pool_size, NUMA_NO_NODE);
if (ret)
goto remove_mapping;
- gen_pool_set_algo(*pool, gen_pool_first_fit_order_align, NULL);
- pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
- pool_size >> 10, &gfp);
return 0;
remove_mapping:
dma_common_free_remap(addr, pool_size);
-destroy_genpool:
- gen_pool_destroy(*pool);
- *pool = NULL;
free_page:
- if (!dma_release_from_contiguous(NULL, page, nr_pages))
+ if (!dma_release_from_contiguous(NULL, page, 1 << order))
__free_pages(page, order);
out:
- pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
- pool_size >> 10, &gfp);
- return -ENOMEM;
+ return ret;
+}
+
+static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
+{
+ if (pool && gen_pool_avail(pool) < atomic_pool_size)
+ atomic_pool_expand(pool, gen_pool_size(pool), gfp);
+}
+
+static void atomic_pool_work_fn(struct work_struct *work)
+{
+ if (IS_ENABLED(CONFIG_ZONE_DMA))
+ atomic_pool_resize(atomic_pool_dma, GFP_DMA);
+ if (IS_ENABLED(CONFIG_ZONE_DMA32))
+ atomic_pool_resize(atomic_pool_dma32, GFP_DMA32);
+ atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL);
+}
+
+static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
+ gfp_t gfp)
+{
+ struct gen_pool *pool;
+ int ret;
+
+ pool = gen_pool_create(PAGE_SHIFT, NUMA_NO_NODE);
+ if (!pool)
+ return NULL;
+
+ gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
+
+ ret = atomic_pool_expand(pool, pool_size, gfp);
+ if (ret) {
+ gen_pool_destroy(pool);
+ pr_err("DMA: failed to allocate %zu KiB %pGg pool for atomic allocation\n",
+ pool_size >> 10, &gfp);
+ return NULL;
+ }
+
+ pr_info("DMA: preallocated %zu KiB %pGg pool for atomic allocations\n",
+ gen_pool_size(pool) >> 10, &gfp);
+ return pool;
}
static int __init dma_atomic_pool_init(void)
{
int ret = 0;
- int err;
- ret = __dma_atomic_pool_init(&atomic_pool_kernel, atomic_pool_size,
- GFP_KERNEL);
+ INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
+
+ atomic_pool_kernel = __dma_atomic_pool_init(atomic_pool_size,
+ GFP_KERNEL);
+ if (!atomic_pool_kernel)
+ ret = -ENOMEM;
if (IS_ENABLED(CONFIG_ZONE_DMA)) {
- err = __dma_atomic_pool_init(&atomic_pool_dma,
- atomic_pool_size, GFP_DMA);
- if (!ret && err)
- ret = err;
+ atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
+ GFP_DMA);
+ if (!atomic_pool_dma)
+ ret = -ENOMEM;
}
if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
- err = __dma_atomic_pool_init(&atomic_pool_dma32,
- atomic_pool_size, GFP_DMA32);
- if (!ret && err)
- ret = err;
+ atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
+ GFP_DMA32);
+ if (!atomic_pool_dma32)
+ ret = -ENOMEM;
}
return ret;
}
@@ -144,6 +186,8 @@ void *dma_alloc_from_pool(struct device *dev, size_t size,
ptr = (void *)val;
memset(ptr, 0, size);
}
+ if (gen_pool_avail(pool) < atomic_pool_size)
+ schedule_work(&atomic_pool_work);
return ptr;
}
On Fri, 10 Apr 2020, Hillf Danton wrote:
>
> On Wed, 8 Apr 2020 14:21:06 -0700 (PDT) David Rientjes wrote:
> >
> > When an atomic pool becomes fully depleted because it is now relied upon
> > for all non-blocking allocations through the DMA API, allow background
> > expansion of each pool by a kworker.
> >
> > When an atomic pool has less than the default size of memory left, kick
> > off a kworker to dynamically expand the pool in the background. The pool
> > is doubled in size, up to MAX_ORDER-1. If memory cannot be allocated at
> > the requested order, smaller allocation(s) are attempted.
> >
> What is proposed looks like a path of single lane without how to
> dynamically shrink the pool taken into account. Thus the risk may
> rise in corner cases where pools are over-expanded in long run
> after one-off peak allocation requests.
>
To us, this is actually a benefit: we prefer the peak size to be
maintained so that we do not need to dynamic resize the pool later at the
cost of throughput. Genpool also does not have great support for
scavenging and freeing unused chunks.
Perhaps we could enforce a maximum size on the pools just as we allow the
default size to be defined by coherent_size= on the command line. Our use
case would not set this, however, since we have not seen egregious genpool
sizes as the result of non-blockable DMA allocations (perhaps the drivers
we use just play friendlier and you have seen excessive usage?).
I'll rely on Christoph to determine whether it makes sense to add some
periodic scavening of the atomic pools, whether that's needed for this to
be merged, or wheter we should enforce some maximum pool size.
On Fri, Apr 10, 2020 at 12:37:20PM -0700, David Rientjes wrote:
> I'll rely on Christoph to determine whether it makes sense to add some
> periodic scavening of the atomic pools, whether that's needed for this to
> be merged, or wheter we should enforce some maximum pool size.
I don't really see the point. In fact the only part of the series
I feel uneasy about is the growing of the pools, because it already
adds a fair amount of complexity that we might not need for simple
things, but shrinking really doesn't make any sense. So I'm tempted
to not ever support shrinking, and even make growing optional code under
a new config variable. We'll also need a way to query the current size
through e.g. a debugfs file.
On Tue, 14 Apr 2020, Christoph Hellwig wrote:
> > I'll rely on Christoph to determine whether it makes sense to add some
> > periodic scavening of the atomic pools, whether that's needed for this to
> > be merged, or wheter we should enforce some maximum pool size.
>
> I don't really see the point. In fact the only part of the series
> I feel uneasy about is the growing of the pools, because it already
> adds a fair amount of complexity that we might not need for simple
> things, but shrinking really doesn't make any sense. So I'm tempted
> to not ever support shrinking, and even make growing optional code under
> a new config variable. We'll also need a way to query the current size
> through e.g. a debugfs file.
>
New debugfs file sounds good, I'll add it. If we want to disable dynamic
expansion when the pool is depleted under a new config option, let me
know.