When allocating atomic DMA memory for a device, the dma-pool core
queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
use. It turns out the GFP flag returned is only an optimistic guess.
The pool selected might sometimes live in a zone higher than the
device's view of memory.
As there isn't a way to grantee a mapping between a device's DMA
constraints and correct GFP flags this unifies both DMA atomic pools.
The resulting pool is allocated in the lower DMA zone available, if any,
so as for devices to always get accessible memory while having the
flexibility of using dma_pool_kernel for the non constrained ones.
Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask")
Reported-by: Jeremy Linton <[email protected]>
Suggested-by: Robin Murphy <[email protected]>
Signed-off-by: Nicolas Saenz Julienne <[email protected]>
---
kernel/dma/pool.c | 47 +++++++++++++++++++----------------------------
1 file changed, 19 insertions(+), 28 deletions(-)
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 8cfa01243ed2..883f7a583969 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -13,10 +13,11 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
+#define GFP_ATOMIC_POOL_DMA (IS_ENABLED(CONFIG_ZONE_DMA) ? GFP_DMA : \
+ IS_ENABLED(CONFIG_ZONE_DMA32) ? GFP_DMA32 : 0)
+
static struct gen_pool *atomic_pool_dma __ro_after_init;
static unsigned long pool_size_dma;
-static struct gen_pool *atomic_pool_dma32 __ro_after_init;
-static unsigned long pool_size_dma32;
static struct gen_pool *atomic_pool_kernel __ro_after_init;
static unsigned long pool_size_kernel;
@@ -42,16 +43,13 @@ static void __init dma_atomic_pool_debugfs_init(void)
return;
debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
- debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
}
static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
{
- if (gfp & __GFP_DMA)
+ if (gfp & GFP_ATOMIC_POOL_DMA)
pool_size_dma += size;
- else if (gfp & __GFP_DMA32)
- pool_size_dma32 += size;
else
pool_size_kernel += size;
}
@@ -132,12 +130,11 @@ static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
static void atomic_pool_work_fn(struct work_struct *work)
{
- if (IS_ENABLED(CONFIG_ZONE_DMA))
- atomic_pool_resize(atomic_pool_dma,
- GFP_KERNEL | GFP_DMA);
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- atomic_pool_resize(atomic_pool_dma32,
- GFP_KERNEL | GFP_DMA32);
+ gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
+
+ if (dma_gfp)
+ atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | dma_gfp);
+
atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL);
}
@@ -168,6 +165,7 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
static int __init dma_atomic_pool_init(void)
{
+ gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
int ret = 0;
/*
@@ -185,18 +183,13 @@ static int __init dma_atomic_pool_init(void)
GFP_KERNEL);
if (!atomic_pool_kernel)
ret = -ENOMEM;
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+
+ if (dma_gfp) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
- GFP_KERNEL | GFP_DMA);
+ GFP_KERNEL | dma_gfp);
if (!atomic_pool_dma)
ret = -ENOMEM;
}
- if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
- atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
- GFP_KERNEL | GFP_DMA32);
- if (!atomic_pool_dma32)
- ret = -ENOMEM;
- }
dma_atomic_pool_debugfs_init();
return ret;
@@ -206,14 +199,12 @@ postcore_initcall(dma_atomic_pool_init);
static inline struct gen_pool *dev_to_pool(struct device *dev)
{
u64 phys_mask;
- gfp_t gfp;
-
- gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_mask);
- if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
- return atomic_pool_dma;
- if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
- return atomic_pool_dma32;
+
+ if (atomic_pool_dma &&
+ dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+ &phys_mask))
+ return atomic_pool_dma;
+
return atomic_pool_kernel;
}
--
2.27.0
Hi,
I spun this up on my 8G model using the PFTF firmware from:
https://github.com/pftf/RPi4/releases
Which allows me to switch between ACPI/DT on the machine. In DT mode it
works fine now, but with ACPI I continue to have failures unless I
disable CMA via cma=0 on the kernel command line. It think that is because
using DT:
[ 0.000000] Reserved memory: created CMA memory pool at
0x0000000037400000, size 64 MiB
using ACPI:
[ 0.000000] cma: Reserved 64 MiB at 0x00000000f8000000
Which is AFAIK because the default arm64 CMA allocation is just below
the arm64_dma32_phys_limit.
Thanks,
On 7/7/20 7:28 AM, Nicolas Saenz Julienne wrote:
> When allocating atomic DMA memory for a device, the dma-pool core
> queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> use. It turns out the GFP flag returned is only an optimistic guess.
> The pool selected might sometimes live in a zone higher than the
> device's view of memory.
>
> As there isn't a way to grantee a mapping between a device's DMA
> constraints and correct GFP flags this unifies both DMA atomic pools.
> The resulting pool is allocated in the lower DMA zone available, if any,
> so as for devices to always get accessible memory while having the
> flexibility of using dma_pool_kernel for the non constrained ones.
>
> Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask")
> Reported-by: Jeremy Linton <[email protected]>
> Suggested-by: Robin Murphy <[email protected]>
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> ---
> kernel/dma/pool.c | 47 +++++++++++++++++++----------------------------
> 1 file changed, 19 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 8cfa01243ed2..883f7a583969 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -13,10 +13,11 @@
> #include <linux/slab.h>
> #include <linux/workqueue.h>
>
> +#define GFP_ATOMIC_POOL_DMA (IS_ENABLED(CONFIG_ZONE_DMA) ? GFP_DMA : \
> + IS_ENABLED(CONFIG_ZONE_DMA32) ? GFP_DMA32 : 0)
> +
> static struct gen_pool *atomic_pool_dma __ro_after_init;
> static unsigned long pool_size_dma;
> -static struct gen_pool *atomic_pool_dma32 __ro_after_init;
> -static unsigned long pool_size_dma32;
> static struct gen_pool *atomic_pool_kernel __ro_after_init;
> static unsigned long pool_size_kernel;
>
> @@ -42,16 +43,13 @@ static void __init dma_atomic_pool_debugfs_init(void)
> return;
>
> debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
> - debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
> debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
> }
>
> static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
> {
> - if (gfp & __GFP_DMA)
> + if (gfp & GFP_ATOMIC_POOL_DMA)
> pool_size_dma += size;
> - else if (gfp & __GFP_DMA32)
> - pool_size_dma32 += size;
> else
> pool_size_kernel += size;
> }
> @@ -132,12 +130,11 @@ static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
>
> static void atomic_pool_work_fn(struct work_struct *work)
> {
> - if (IS_ENABLED(CONFIG_ZONE_DMA))
> - atomic_pool_resize(atomic_pool_dma,
> - GFP_KERNEL | GFP_DMA);
> - if (IS_ENABLED(CONFIG_ZONE_DMA32))
> - atomic_pool_resize(atomic_pool_dma32,
> - GFP_KERNEL | GFP_DMA32);
> + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
> +
> + if (dma_gfp)
> + atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | dma_gfp);
> +
> atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL);
> }
>
> @@ -168,6 +165,7 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
>
> static int __init dma_atomic_pool_init(void)
> {
> + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
> int ret = 0;
>
> /*
> @@ -185,18 +183,13 @@ static int __init dma_atomic_pool_init(void)
> GFP_KERNEL);
> if (!atomic_pool_kernel)
> ret = -ENOMEM;
> - if (IS_ENABLED(CONFIG_ZONE_DMA)) {
> +
> + if (dma_gfp) {
> atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
> - GFP_KERNEL | GFP_DMA);
> + GFP_KERNEL | dma_gfp);
> if (!atomic_pool_dma)
> ret = -ENOMEM;
> }
> - if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
> - atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
> - GFP_KERNEL | GFP_DMA32);
> - if (!atomic_pool_dma32)
> - ret = -ENOMEM;
> - }
>
> dma_atomic_pool_debugfs_init();
> return ret;
> @@ -206,14 +199,12 @@ postcore_initcall(dma_atomic_pool_init);
> static inline struct gen_pool *dev_to_pool(struct device *dev)
> {
> u64 phys_mask;
> - gfp_t gfp;
> -
> - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> - &phys_mask);
> - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
> - return atomic_pool_dma;
> - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
> - return atomic_pool_dma32;
> +
> + if (atomic_pool_dma &&
> + dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> + &phys_mask))
> + return atomic_pool_dma;
> +
> return atomic_pool_kernel;
> }
>
>
Hi Jim,
On Tue, 2020-07-07 at 17:08 -0500, Jeremy Linton wrote:
> Hi,
>
> I spun this up on my 8G model using the PFTF firmware from:
>
> https://github.com/pftf/RPi4/releases
>
> Which allows me to switch between ACPI/DT on the machine. In DT mode it
> works fine now,
Nice, would that count as a Tested-by from you?
> but with ACPI I continue to have failures unless I
> disable CMA via cma=0 on the kernel command line.
Yes, I see why, in atomic_pool_expand() memory is allocated from CMA without
checking its correctness. That calls for a separate fix. I'll try to think of
something.
> It think that is because
>
> using DT:
>
> [ 0.000000] Reserved memory: created CMA memory pool at
> 0x0000000037400000, size 64 MiB
>
>
> using ACPI:
> [ 0.000000] cma: Reserved 64 MiB at 0x00000000f8000000
>
> Which is AFAIK because the default arm64 CMA allocation is just below
> the arm64_dma32_phys_limit.
As I'm sure you know, we fix the CMA address trough DT, isn't that possible
trough ACPI?
Regards,
Nicolas
Hi,
On 7/8/20 5:35 AM, Nicolas Saenz Julienne wrote:
> Hi Jim,
>
> On Tue, 2020-07-07 at 17:08 -0500, Jeremy Linton wrote:
>> Hi,
>>
>> I spun this up on my 8G model using the PFTF firmware from:
>>
>> https://github.com/pftf/RPi4/releases
>>
>> Which allows me to switch between ACPI/DT on the machine. In DT mode it
>> works fine now,
>
> Nice, would that count as a Tested-by from you?
If it worked... :)
>
>> but with ACPI I continue to have failures unless I
>> disable CMA via cma=0 on the kernel command line.
>
> Yes, I see why, in atomic_pool_expand() memory is allocated from CMA without
> checking its correctness. That calls for a separate fix. I'll try to think of
> something.
>
>> It think that is because
>>
>> using DT:
>>
>> [ 0.000000] Reserved memory: created CMA memory pool at
>> 0x0000000037400000, size 64 MiB
>>
>>
>> using ACPI:
>> [ 0.000000] cma: Reserved 64 MiB at 0x00000000f8000000
>>
>> Which is AFAIK because the default arm64 CMA allocation is just below
>> the arm64_dma32_phys_limit.
>
> As I'm sure you know, we fix the CMA address trough DT, isn't that possible
> trough ACPI?
Well there isn't a linux specific cma location property in ACPI. There
are various ways to infer the information, like looking for the lowest
_DMA() range and using that to lower the arm64_dma32_phys_limit. OTOH,
as it stands I don't think that information is available early enough to
setup the cma pool.
But as you mention the atomic pool code is allocating from CMA under the
assumption that its going to be below the GFP_DMA range, which might not
be generally true (due to lack of DT cma properties too?).
On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote:
> When allocating atomic DMA memory for a device, the dma-pool core
> queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> use. It turns out the GFP flag returned is only an optimistic guess.
> The pool selected might sometimes live in a zone higher than the
> device's view of memory.
>
> As there isn't a way to grantee a mapping between a device's DMA
> constraints and correct GFP flags this unifies both DMA atomic pools.
> The resulting pool is allocated in the lower DMA zone available, if any,
> so as for devices to always get accessible memory while having the
> flexibility of using dma_pool_kernel for the non constrained ones.
>
> Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask")
> Reported-by: Jeremy Linton <[email protected]>
> Suggested-by: Robin Murphy <[email protected]>
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>
Hmm, this is not what I expected from the previous thread. I thought
we'd just use one dma pool based on runtime available of the zones..
On Wed, Jul 08, 2020 at 12:35:34PM +0200, Nicolas Saenz Julienne wrote:
> > Which allows me to switch between ACPI/DT on the machine. In DT mode it
> > works fine now,
>
> Nice, would that count as a Tested-by from you?
>
> > but with ACPI I continue to have failures unless I
> > disable CMA via cma=0 on the kernel command line.
>
> Yes, I see why, in atomic_pool_expand() memory is allocated from CMA without
> checking its correctness. That calls for a separate fix. I'll try to think of
> something.
I think we need a dma_coherent_ok for the allocations from the
pool and then fall back to the next better one to get started. And
yes, CMA is a bit of a mess, that generally needs better checks.
On Wed, 2020-07-08 at 17:35 +0200, Christoph Hellwig wrote:
> On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote:
> > When allocating atomic DMA memory for a device, the dma-pool core
> > queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> > use. It turns out the GFP flag returned is only an optimistic guess.
> > The pool selected might sometimes live in a zone higher than the
> > device's view of memory.
> >
> > As there isn't a way to grantee a mapping between a device's DMA
> > constraints and correct GFP flags this unifies both DMA atomic pools.
> > The resulting pool is allocated in the lower DMA zone available, if any,
> > so as for devices to always get accessible memory while having the
> > flexibility of using dma_pool_kernel for the non constrained ones.
> >
> > Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp
> > mask")
> > Reported-by: Jeremy Linton <[email protected]>
> > Suggested-by: Robin Murphy <[email protected]>
> > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
>
> Hmm, this is not what I expected from the previous thread. I thought
> we'd just use one dma pool based on runtime available of the zones..
I may be misunderstanding you, but isn't that going back to how things used to
be before pulling in David Rientjes' work? The benefit of having a GFP_KERNEL
pool is that non-address-constrained devices can get their atomic memory there,
instead of consuming somewhat scarcer low memory.
On Wed, Jul 08, 2020 at 06:00:35PM +0200, Nicolas Saenz Julienne wrote:
> On Wed, 2020-07-08 at 17:35 +0200, Christoph Hellwig wrote:
> > On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote:
> > > When allocating atomic DMA memory for a device, the dma-pool core
> > > queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> > > use. It turns out the GFP flag returned is only an optimistic guess.
> > > The pool selected might sometimes live in a zone higher than the
> > > device's view of memory.
> > >
> > > As there isn't a way to grantee a mapping between a device's DMA
> > > constraints and correct GFP flags this unifies both DMA atomic pools.
> > > The resulting pool is allocated in the lower DMA zone available, if any,
> > > so as for devices to always get accessible memory while having the
> > > flexibility of using dma_pool_kernel for the non constrained ones.
> > >
> > > Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp
> > > mask")
> > > Reported-by: Jeremy Linton <[email protected]>
> > > Suggested-by: Robin Murphy <[email protected]>
> > > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> >
> > Hmm, this is not what I expected from the previous thread. I thought
> > we'd just use one dma pool based on runtime available of the zones..
>
> I may be misunderstanding you, but isn't that going back to how things used to
> be before pulling in David Rientjes' work? The benefit of having a GFP_KERNEL
> pool is that non-address-constrained devices can get their atomic memory there,
> instead of consuming somewhat scarcer low memory.
Yes, I think we are misunderstanding each other. I don't want to remove
any pool, just make better runtime decisions when to use them.
On 2020-07-08 16:36, Christoph Hellwig wrote:
> On Wed, Jul 08, 2020 at 12:35:34PM +0200, Nicolas Saenz Julienne wrote:
>>> Which allows me to switch between ACPI/DT on the machine. In DT mode it
>>> works fine now,
>>
>> Nice, would that count as a Tested-by from you?
>>
>>> but with ACPI I continue to have failures unless I
>>> disable CMA via cma=0 on the kernel command line.
>>
>> Yes, I see why, in atomic_pool_expand() memory is allocated from CMA without
>> checking its correctness. That calls for a separate fix. I'll try to think of
>> something.
>
> I think we need a dma_coherent_ok for the allocations from the
> pool and then fall back to the next better one to get started. And
> yes, CMA is a bit of a mess, that generally needs better checks.
Yeah, another thought that came to mind later is that iommu-dma can use
pages from any pool regardless of the device's DMA mask, so we could
stand to be a lot less restrictive in that case too.
Perhaps it is better to just bite the bullet, keep the straightforward
one-pool-per-zone setup, and implement the dma_coherent_ok() type
fallback logic. More complexity for dma_alloc_from_pool(), but
everything else stays nice and simple - lose the assumption that
dev_to_pool() can work for this and and just let callers pass an
allocation mask directly, and have dma_free_from_pool() simply check all
available pools.
Robin.
Hi,
On 7/7/20 7:28 AM, Nicolas Saenz Julienne wrote:
> When allocating atomic DMA memory for a device, the dma-pool core
> queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> use. It turns out the GFP flag returned is only an optimistic guess.
> The pool selected might sometimes live in a zone higher than the
> device's view of memory.
>
> As there isn't a way to grantee a mapping between a device's DMA
> constraints and correct GFP flags this unifies both DMA atomic pools.
> The resulting pool is allocated in the lower DMA zone available, if any,
> so as for devices to always get accessible memory while having the
> flexibility of using dma_pool_kernel for the non constrained ones.
With the follow-on patch "dma-pool: Do not allocate pool memory from
CMA" everything seems to be working fine now.
tested-by: Jeremy Linton <[email protected]>
Thanks for fixing this!
>
> Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask")
> Reported-by: Jeremy Linton <[email protected]>
> Suggested-by: Robin Murphy <[email protected]>
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> ---
> kernel/dma/pool.c | 47 +++++++++++++++++++----------------------------
> 1 file changed, 19 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 8cfa01243ed2..883f7a583969 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -13,10 +13,11 @@
> #include <linux/slab.h>
> #include <linux/workqueue.h>
>
> +#define GFP_ATOMIC_POOL_DMA (IS_ENABLED(CONFIG_ZONE_DMA) ? GFP_DMA : \
> + IS_ENABLED(CONFIG_ZONE_DMA32) ? GFP_DMA32 : 0)
> +
> static struct gen_pool *atomic_pool_dma __ro_after_init;
> static unsigned long pool_size_dma;
> -static struct gen_pool *atomic_pool_dma32 __ro_after_init;
> -static unsigned long pool_size_dma32;
> static struct gen_pool *atomic_pool_kernel __ro_after_init;
> static unsigned long pool_size_kernel;
>
> @@ -42,16 +43,13 @@ static void __init dma_atomic_pool_debugfs_init(void)
> return;
>
> debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
> - debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
> debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
> }
>
> static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
> {
> - if (gfp & __GFP_DMA)
> + if (gfp & GFP_ATOMIC_POOL_DMA)
> pool_size_dma += size;
> - else if (gfp & __GFP_DMA32)
> - pool_size_dma32 += size;
> else
> pool_size_kernel += size;
> }
> @@ -132,12 +130,11 @@ static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp)
>
> static void atomic_pool_work_fn(struct work_struct *work)
> {
> - if (IS_ENABLED(CONFIG_ZONE_DMA))
> - atomic_pool_resize(atomic_pool_dma,
> - GFP_KERNEL | GFP_DMA);
> - if (IS_ENABLED(CONFIG_ZONE_DMA32))
> - atomic_pool_resize(atomic_pool_dma32,
> - GFP_KERNEL | GFP_DMA32);
> + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
> +
> + if (dma_gfp)
> + atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | dma_gfp);
> +
> atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL);
> }
>
> @@ -168,6 +165,7 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size,
>
> static int __init dma_atomic_pool_init(void)
> {
> + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA;
> int ret = 0;
>
> /*
> @@ -185,18 +183,13 @@ static int __init dma_atomic_pool_init(void)
> GFP_KERNEL);
> if (!atomic_pool_kernel)
> ret = -ENOMEM;
> - if (IS_ENABLED(CONFIG_ZONE_DMA)) {
> +
> + if (dma_gfp) {
> atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
> - GFP_KERNEL | GFP_DMA);
> + GFP_KERNEL | dma_gfp);
> if (!atomic_pool_dma)
> ret = -ENOMEM;
> }
> - if (IS_ENABLED(CONFIG_ZONE_DMA32)) {
> - atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size,
> - GFP_KERNEL | GFP_DMA32);
> - if (!atomic_pool_dma32)
> - ret = -ENOMEM;
> - }
>
> dma_atomic_pool_debugfs_init();
> return ret;
> @@ -206,14 +199,12 @@ postcore_initcall(dma_atomic_pool_init);
> static inline struct gen_pool *dev_to_pool(struct device *dev)
> {
> u64 phys_mask;
> - gfp_t gfp;
> -
> - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> - &phys_mask);
> - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
> - return atomic_pool_dma;
> - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
> - return atomic_pool_dma32;
> +
> + if (atomic_pool_dma &&
> + dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
> + &phys_mask))
> + return atomic_pool_dma;
> +
> return atomic_pool_kernel;
> }
>
>
On Wed, 8 Jul 2020, Christoph Hellwig wrote:
> On Wed, Jul 08, 2020 at 06:00:35PM +0200, Nicolas Saenz Julienne wrote:
> > On Wed, 2020-07-08 at 17:35 +0200, Christoph Hellwig wrote:
> > > On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote:
> > > > When allocating atomic DMA memory for a device, the dma-pool core
> > > > queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> > > > use. It turns out the GFP flag returned is only an optimistic guess.
> > > > The pool selected might sometimes live in a zone higher than the
> > > > device's view of memory.
> > > >
> > > > As there isn't a way to grantee a mapping between a device's DMA
> > > > constraints and correct GFP flags this unifies both DMA atomic pools.
> > > > The resulting pool is allocated in the lower DMA zone available, if any,
> > > > so as for devices to always get accessible memory while having the
> > > > flexibility of using dma_pool_kernel for the non constrained ones.
> > > >
> > > > Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp
> > > > mask")
> > > > Reported-by: Jeremy Linton <[email protected]>
> > > > Suggested-by: Robin Murphy <[email protected]>
> > > > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> > >
> > > Hmm, this is not what I expected from the previous thread. I thought
> > > we'd just use one dma pool based on runtime available of the zones..
> >
> > I may be misunderstanding you, but isn't that going back to how things used to
> > be before pulling in David Rientjes' work? The benefit of having a GFP_KERNEL
> > pool is that non-address-constrained devices can get their atomic memory there,
> > instead of consuming somewhat scarcer low memory.
>
> Yes, I think we are misunderstanding each other. I don't want to remove
> any pool, just make better runtime decisions when to use them.
>
Just to be extra explicit for the record and for my own understanding:
Nicolas, your patch series "dma-pool: Fix atomic pool selection" obsoletes
this patch, right? :)
On Thu, 2020-07-09 at 14:49 -0700, David Rientjes wrote:
> On Wed, 8 Jul 2020, Christoph Hellwig wrote:
>
> > On Wed, Jul 08, 2020 at 06:00:35PM +0200, Nicolas Saenz Julienne wrote:
> > > On Wed, 2020-07-08 at 17:35 +0200, Christoph Hellwig wrote:
> > > > On Tue, Jul 07, 2020 at 02:28:04PM +0200, Nicolas Saenz Julienne wrote:
> > > > > When allocating atomic DMA memory for a device, the dma-pool core
> > > > > queries __dma_direct_optimal_gfp_mask() to check which atomic pool to
> > > > > use. It turns out the GFP flag returned is only an optimistic guess.
> > > > > The pool selected might sometimes live in a zone higher than the
> > > > > device's view of memory.
> > > > >
> > > > > As there isn't a way to grantee a mapping between a device's DMA
> > > > > constraints and correct GFP flags this unifies both DMA atomic pools.
> > > > > The resulting pool is allocated in the lower DMA zone available, if
> > > > > any,
> > > > > so as for devices to always get accessible memory while having the
> > > > > flexibility of using dma_pool_kernel for the non constrained ones.
> > > > >
> > > > > Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map
> > > > > to gfp
> > > > > mask")
> > > > > Reported-by: Jeremy Linton <[email protected]>
> > > > > Suggested-by: Robin Murphy <[email protected]>
> > > > > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> > > >
> > > > Hmm, this is not what I expected from the previous thread. I thought
> > > > we'd just use one dma pool based on runtime available of the zones..
> > >
> > > I may be misunderstanding you, but isn't that going back to how things
> > > used to
> > > be before pulling in David Rientjes' work? The benefit of having a
> > > GFP_KERNEL
> > > pool is that non-address-constrained devices can get their atomic memory
> > > there,
> > > instead of consuming somewhat scarcer low memory.
> >
> > Yes, I think we are misunderstanding each other. I don't want to remove
> > any pool, just make better runtime decisions when to use them.
> >
>
> Just to be extra explicit for the record and for my own understanding:
> Nicolas, your patch series "dma-pool: Fix atomic pool selection" obsoletes
> this patch, right? :)
Yes, that's right.
Regards,
Nicolas