Now that we have an explanation to Amir's issue, we can re-spin this
series.
---
Changes since v2:
- Go back to v1's behavior for patch #2
Changes since v1:
- Make cma_in_zone() more strict, GFP_KERNEL doesn't default to true
now
- Check if phys_addr_ok() exists prior calling it
Christoph Hellwig (1):
dma-pool: fix coherent pool allocations for IOMMU mappings
Nicolas Saenz Julienne (1):
dma-pool: Only allocate from CMA when in same memory zone
drivers/iommu/dma-iommu.c | 4 +-
include/linux/dma-direct.h | 3 -
include/linux/dma-mapping.h | 5 +-
kernel/dma/direct.c | 13 +++-
kernel/dma/pool.c | 145 +++++++++++++++++++-----------------
5 files changed, 92 insertions(+), 78 deletions(-)
--
2.28.0
From: Christoph Hellwig <[email protected]>
When allocating coherent pool memory for an IOMMU mapping we don't care
about the DMA mask. Move the guess for the initial GFP mask into the
dma_direct_alloc_pages and pass dma_coherent_ok as a function pointer
argument so that it doesn't get applied to the IOMMU case.
Signed-off-by: Christoph Hellwig <[email protected]>
---
Changes since v1:
- Check if phys_addr_ok() exists prior calling it
drivers/iommu/dma-iommu.c | 4 +-
include/linux/dma-direct.h | 3 -
include/linux/dma-mapping.h | 5 +-
kernel/dma/direct.c | 13 ++--
kernel/dma/pool.c | 114 +++++++++++++++---------------------
5 files changed, 62 insertions(+), 77 deletions(-)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4959f5df21bd..5141d49a046b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1035,8 +1035,8 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
!gfpflags_allow_blocking(gfp) && !coherent)
- cpu_addr = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page,
- gfp);
+ page = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &cpu_addr,
+ gfp, NULL);
else
cpu_addr = iommu_dma_alloc_pages(dev, size, &page, gfp, attrs);
if (!cpu_addr)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 5a3ce2a24794..6e87225600ae 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -73,9 +73,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size,
}
u64 dma_direct_get_required_mask(struct device *dev);
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
- u64 *phys_mask);
-bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size);
void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t gfp, unsigned long attrs);
void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 016b96b384bd..52635e91143b 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -522,8 +522,9 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
pgprot_t prot, const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size);
-void *dma_alloc_from_pool(struct device *dev, size_t size,
- struct page **ret_page, gfp_t flags);
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+ void **cpu_addr, gfp_t flags,
+ bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t));
bool dma_free_from_pool(struct device *dev, void *start, size_t size);
int
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index bb0041e99659..db6ef07aec3b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -43,7 +43,7 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
+static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
u64 *phys_limit)
{
u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
@@ -68,7 +68,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
return 0;
}
-bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
return phys_to_dma_direct(dev, phys) + size - 1 <=
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
@@ -161,8 +161,13 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
size = PAGE_ALIGN(size);
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
- ret = dma_alloc_from_pool(dev, size, &page, gfp);
- if (!ret)
+ u64 phys_mask;
+
+ gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
+ &phys_mask);
+ page = dma_alloc_from_pool(dev, size, &ret, gfp,
+ dma_coherent_ok);
+ if (!page)
return NULL;
goto done;
}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 6bc74a2d5127..5d071d4a3cba 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -196,93 +196,75 @@ static int __init dma_atomic_pool_init(void)
}
postcore_initcall(dma_atomic_pool_init);
-static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev)
+static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
{
- u64 phys_mask;
- gfp_t gfp;
-
- gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_mask);
- if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
+ if (prev == NULL) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+ return atomic_pool_dma32;
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+ return atomic_pool_dma;
+ return atomic_pool_kernel;
+ }
+ if (prev == atomic_pool_kernel)
+ return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma;
+ if (prev == atomic_pool_dma32)
return atomic_pool_dma;
- if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
- return atomic_pool_dma32;
- return atomic_pool_kernel;
+ return NULL;
}
-static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool)
+static struct page *__dma_alloc_from_pool(struct device *dev, size_t size,
+ struct gen_pool *pool, void **cpu_addr,
+ bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
{
- if (bad_pool == atomic_pool_kernel)
- return atomic_pool_dma32 ? : atomic_pool_dma;
+ unsigned long addr;
+ phys_addr_t phys;
- if (bad_pool == atomic_pool_dma32)
- return atomic_pool_dma;
+ addr = gen_pool_alloc(pool, size);
+ if (!addr)
+ return NULL;
- return NULL;
-}
+ phys = gen_pool_virt_to_phys(pool, addr);
+ if (phys_addr_ok && !phys_addr_ok(dev, phys, size)) {
+ gen_pool_free(pool, addr, size);
+ return NULL;
+ }
-static inline struct gen_pool *dma_guess_pool(struct device *dev,
- struct gen_pool *bad_pool)
-{
- if (bad_pool)
- return dma_get_safer_pool(bad_pool);
+ if (gen_pool_avail(pool) < atomic_pool_size)
+ schedule_work(&atomic_pool_work);
- return dma_guess_pool_from_device(dev);
+ *cpu_addr = (void *)addr;
+ memset(*cpu_addr, 0, size);
+ return pfn_to_page(__phys_to_pfn(phys));
}
-void *dma_alloc_from_pool(struct device *dev, size_t size,
- struct page **ret_page, gfp_t flags)
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+ void **cpu_addr, gfp_t gfp,
+ bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
{
struct gen_pool *pool = NULL;
- unsigned long val = 0;
- void *ptr = NULL;
- phys_addr_t phys;
-
- while (1) {
- pool = dma_guess_pool(dev, pool);
- if (!pool) {
- WARN(1, "Failed to get suitable pool for %s\n",
- dev_name(dev));
- break;
- }
-
- val = gen_pool_alloc(pool, size);
- if (!val)
- continue;
-
- phys = gen_pool_virt_to_phys(pool, val);
- if (dma_coherent_ok(dev, phys, size))
- break;
-
- gen_pool_free(pool, val, size);
- val = 0;
- }
-
-
- if (val) {
- *ret_page = pfn_to_page(__phys_to_pfn(phys));
- ptr = (void *)val;
- memset(ptr, 0, size);
+ struct page *page;
- if (gen_pool_avail(pool) < atomic_pool_size)
- schedule_work(&atomic_pool_work);
+ while ((pool = dma_guess_pool(pool, gfp))) {
+ page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
+ phys_addr_ok);
+ if (page)
+ return page;
}
- return ptr;
+ WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
+ return NULL;
}
bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
struct gen_pool *pool = NULL;
- while (1) {
- pool = dma_guess_pool(dev, pool);
- if (!pool)
- return false;
-
- if (gen_pool_has_addr(pool, (unsigned long)start, size)) {
- gen_pool_free(pool, (unsigned long)start, size);
- return true;
- }
+ while ((pool = dma_guess_pool(pool, 0))) {
+ if (!gen_pool_has_addr(pool, (unsigned long)start, size))
+ continue;
+ gen_pool_free(pool, (unsigned long)start, size);
+ return true;
}
+
+ return false;
}
--
2.28.0
There is no guarantee to CMA's placement, so allocating a zone specific
atomic pool from CMA might return memory from a completely different
memory zone. To get around this double check CMA's placement before
allocating from it.
Signed-off-by: Nicolas Saenz Julienne <[email protected]>
[hch: rebased, added a fallback to the page allocator, allow dipping into
lower CMA pools]
Signed-off-by: Christoph Hellwig <[email protected]>
---
Changes since v2:
- Go back to v1 behavior
kernel/dma/pool.c | 31 ++++++++++++++++++++++++++++++-
1 file changed, 30 insertions(+), 1 deletion(-)
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 5d071d4a3cba..30d28d761490 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -3,7 +3,9 @@
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2020 Google LLC
*/
+#include <linux/cma.h>
#include <linux/debugfs.h>
+#include <linux/dma-contiguous.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
#include <linux/init.h>
@@ -55,6 +57,29 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
pool_size_kernel += size;
}
+static bool cma_in_zone(gfp_t gfp)
+{
+ unsigned long size;
+ phys_addr_t end;
+ struct cma *cma;
+
+ cma = dev_get_cma_area(NULL);
+ if (!cma)
+ return false;
+
+ size = cma_get_size(cma);
+ if (!size)
+ return false;
+
+ /* CMA can't cross zone boundaries, see cma_activate_area() */
+ end = cma_get_base(cma) - memblock_start_of_DRAM() + size - 1;
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+ return end <= DMA_BIT_MASK(zone_dma_bits);
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+ return end <= DMA_BIT_MASK(32);
+ return true;
+}
+
static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
gfp_t gfp)
{
@@ -68,7 +93,11 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
do {
pool_size = 1 << (PAGE_SHIFT + order);
- page = alloc_pages(gfp, order);
+ if (cma_in_zone(gfp))
+ page = dma_alloc_from_contiguous(NULL, 1 << order,
+ order, false);
+ if (!page)
+ page = alloc_pages(gfp, order);
} while (!page && order-- > 0);
if (!page)
goto out;
--
2.28.0
Hi Nicolas,
I love your patch! Perhaps something to improve:
[auto build test WARNING on iommu/next]
[also build test WARNING on linus/master v5.8]
[cannot apply to next-20200806]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Nicolas-Saenz-Julienne/dma-pool-fixes/20200807-025101
base: https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
config: x86_64-randconfig-s031-20200806 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.2-117-g8c7aee71-dirty
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>, old ones prefixed by <<):
>> WARNING: modpost: vmlinux.o(.text+0x2840fa): Section mismatch in reference from the function atomic_pool_expand() to the function .meminit.text:memblock_start_of_DRAM()
The function atomic_pool_expand() references
the function __meminit memblock_start_of_DRAM().
This is often because atomic_pool_expand lacks a __meminit
annotation or the annotation of memblock_start_of_DRAM is wrong.
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On Thu, Aug 06, 2020 at 08:47:55PM +0200, Nicolas Saenz Julienne wrote:
> There is no guarantee to CMA's placement, so allocating a zone specific
> atomic pool from CMA might return memory from a completely different
> memory zone. To get around this double check CMA's placement before
> allocating from it.
As the builtbot pointed out, memblock_start_of_DRAM can't be used from
non-__init code. But lookig at it I think throwing that in
is bogus anyway, as cma_get_base returns a proper physical address
already.
On Fri, 2020-08-07 at 07:21 +0200, Christoph Hellwig wrote:
> On Thu, Aug 06, 2020 at 08:47:55PM +0200, Nicolas Saenz Julienne wrote:
> > There is no guarantee to CMA's placement, so allocating a zone specific
> > atomic pool from CMA might return memory from a completely different
> > memory zone. To get around this double check CMA's placement before
> > allocating from it.
>
> As the builtbot pointed out, memblock_start_of_DRAM can't be used from
> non-__init code. But lookig at it I think throwing that in
> is bogus anyway, as cma_get_base returns a proper physical address
> already.
It does indeed, but I'm comparing CMA's base with bitmasks that don't take into
account where the memory starts. Say memory starts at 0x80000000, and CMA falls
into ZONE_DMA [0x80000000 0xC0000000], if you want to compare it with
DMA_BIT_MASK(zone_dma_bits) you're forced to unify the memory bases.
That said, I now realize that this doesn't work for ZONE_DMA32 which has a hard
limit on 32bit addresses reglardless of the memory base.
That said I still need to call memblock_start_of_DRAM() any suggestions WRT
that? I could save the value in dma_atomic_pool_init(), which is __init code.
On Fri, Aug 07, 2020 at 10:50:19AM +0200, Nicolas Saenz Julienne wrote:
> On Fri, 2020-08-07 at 07:21 +0200, Christoph Hellwig wrote:
> > On Thu, Aug 06, 2020 at 08:47:55PM +0200, Nicolas Saenz Julienne wrote:
> > > There is no guarantee to CMA's placement, so allocating a zone specific
> > > atomic pool from CMA might return memory from a completely different
> > > memory zone. To get around this double check CMA's placement before
> > > allocating from it.
> >
> > As the builtbot pointed out, memblock_start_of_DRAM can't be used from
> > non-__init code. But lookig at it I think throwing that in
> > is bogus anyway, as cma_get_base returns a proper physical address
> > already.
>
> It does indeed, but I'm comparing CMA's base with bitmasks that don't take into
> account where the memory starts. Say memory starts at 0x80000000, and CMA falls
> into ZONE_DMA [0x80000000 0xC0000000], if you want to compare it with
> DMA_BIT_MASK(zone_dma_bits) you're forced to unify the memory bases.
>
> That said, I now realize that this doesn't work for ZONE_DMA32 which has a hard
> limit on 32bit addresses reglardless of the memory base.
>
> That said I still need to call memblock_start_of_DRAM() any suggestions WRT
> that? I could save the value in dma_atomic_pool_init(), which is __init code.
The pool must be about a 32-bit physical address. The offsets can be
different for every device..
On Sat, Aug 08, 2020 at 08:33:54AM +0200, Christoph Hellwig wrote:
> On Fri, Aug 07, 2020 at 10:50:19AM +0200, Nicolas Saenz Julienne wrote:
> > On Fri, 2020-08-07 at 07:21 +0200, Christoph Hellwig wrote:
> > > On Thu, Aug 06, 2020 at 08:47:55PM +0200, Nicolas Saenz Julienne wrote:
> > > > There is no guarantee to CMA's placement, so allocating a zone specific
> > > > atomic pool from CMA might return memory from a completely different
> > > > memory zone. To get around this double check CMA's placement before
> > > > allocating from it.
> > >
> > > As the builtbot pointed out, memblock_start_of_DRAM can't be used from
> > > non-__init code. But lookig at it I think throwing that in
> > > is bogus anyway, as cma_get_base returns a proper physical address
> > > already.
> >
> > It does indeed, but I'm comparing CMA's base with bitmasks that don't take into
> > account where the memory starts. Say memory starts at 0x80000000, and CMA falls
> > into ZONE_DMA [0x80000000 0xC0000000], if you want to compare it with
> > DMA_BIT_MASK(zone_dma_bits) you're forced to unify the memory bases.
> >
> > That said, I now realize that this doesn't work for ZONE_DMA32 which has a hard
> > limit on 32bit addresses reglardless of the memory base.
> >
> > That said I still need to call memblock_start_of_DRAM() any suggestions WRT
> > that? I could save the value in dma_atomic_pool_init(), which is __init code.
>
> The pool must be about a 32-bit physical address. The offsets can be
> different for every device..
Do you plan to resend this one without the memblock_start_of_DRAM
thingy?
On Fri, 2020-08-14 at 08:06 +0200, Christoph Hellwig wrote:
> On Sat, Aug 08, 2020 at 08:33:54AM +0200, Christoph Hellwig wrote:
> > On Fri, Aug 07, 2020 at 10:50:19AM +0200, Nicolas Saenz Julienne wrote:
> > > On Fri, 2020-08-07 at 07:21 +0200, Christoph Hellwig wrote:
> > > > On Thu, Aug 06, 2020 at 08:47:55PM +0200, Nicolas Saenz Julienne wrote:
> > > > > There is no guarantee to CMA's placement, so allocating a zone specific
> > > > > atomic pool from CMA might return memory from a completely different
> > > > > memory zone. To get around this double check CMA's placement before
> > > > > allocating from it.
> > > >
> > > > As the builtbot pointed out, memblock_start_of_DRAM can't be used from
> > > > non-__init code. But lookig at it I think throwing that in
> > > > is bogus anyway, as cma_get_base returns a proper physical address
> > > > already.
> > >
> > > It does indeed, but I'm comparing CMA's base with bitmasks that don't take into
> > > account where the memory starts. Say memory starts at 0x80000000, and CMA falls
> > > into ZONE_DMA [0x80000000 0xC0000000], if you want to compare it with
> > > DMA_BIT_MASK(zone_dma_bits) you're forced to unify the memory bases.
> > >
> > > That said, I now realize that this doesn't work for ZONE_DMA32 which has a hard
> > > limit on 32bit addresses reglardless of the memory base.
> > >
> > > That said I still need to call memblock_start_of_DRAM() any suggestions WRT
> > > that? I could save the value in dma_atomic_pool_init(), which is __init code.
> >
> > The pool must be about a 32-bit physical address. The offsets can be
> > different for every device..
I now see what you mean.
I was trying to blindly fit CMA with arm64's DMA zone setup, which, as it turns
out, doesn't really honor its purpose. arm64 introduced ZONE_DMA to provide a
30-bit address space, but we're creating it regardless of whether it exists or
not. This creates a mismatch between zone_dma_bits and ZONE_DMA's real
placement. I'll try to look at fixing that in arm64.
> Do you plan to resend this one without the memblock_start_of_DRAM
> thingy?
Yes, sorry for the wait, I've been on vacation and short on time, I'll send it
during the day.
Regards,
Nicolas