2022-12-16 20:29:37

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 00/11] dmapool enhancements

From: Keith Busch <[email protected]>

The time spent in dma_pool alloc/free goes up linearly as the number of
pages backing the pool increases. We can reduce this to constant time
with some minor changes to how free pages are tracked.

Changes since previous version:

Fixed dmapool_test to use appropriate dma_op accessors

Fixed up comment in dmapool about the data structures

Fixed dma pool boundary checks when initializing the pool

And since I mess up the boundary checks for odd alignment, I updated
dmapool_test allow setting arbitrary alignment and boundary to catch
the same case that failed in the previous version, reported here:

https://lore.kernel.org/oe-lkp/[email protected]/

Keith Busch (7):
dmapool: add alloc/free performance test
dmapool: move debug code to own functions
dmapool: rearrange page alloc failure handling
dmapool: consolidate page initialization
dmapool: simplify freeing
dmapool: don't memset on free twice
dmapool: link blocks across pages

Tony Battersby (4):
dmapool: remove checks for dev == NULL
dmapool: use sysfs_emit() instead of scnprintf()
dmapool: cleanup integer types
dmapool: speedup DMAPOOL_DEBUG with init_on_alloc

mm/Kconfig | 9 ++
mm/Makefile | 1 +
mm/dmapool.c | 356 ++++++++++++++++++++++------------------------
mm/dmapool_test.c | 146 +++++++++++++++++++
4 files changed, 324 insertions(+), 188 deletions(-)
create mode 100644 mm/dmapool_test.c

--
2.30.2


2022-12-16 20:33:20

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 01/11] dmapool: add alloc/free performance test

From: Keith Busch <[email protected]>

Provide a module that allocates and frees many blocks of various sizes
and report how long it takes. This is intended to provide a consistent
way to measure how changes to the dma_pool_alloc/free routines affect
timing.

Signed-off-by: Keith Busch <[email protected]>
---
v1->v2:

Used appropriate dma_ops accessor, fixing:

https://lore.kernel.org/all/[email protected]/

Added ability to test arbitrary alignments and boundaries and added an
entry to test the previously failing test case:

https://lore.kernel.org/oe-lkp/[email protected]/

mm/Kconfig | 9 +++
mm/Makefile | 1 +
mm/dmapool_test.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+)
create mode 100644 mm/dmapool_test.c

diff --git a/mm/Kconfig b/mm/Kconfig
index 34d36958b8ac9..3d71acc29c10f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1070,6 +1070,15 @@ config GUP_TEST
comment "GUP_TEST needs to have DEBUG_FS enabled"
depends on !GUP_TEST && !DEBUG_FS

+config DMAPOOL_TEST
+ tristate "Enable a module to run time tests on dma_pool"
+ depends on HAS_DMA
+ help
+ Provides a module that will allocate and free many blocks of various
+ sizes and report how long it takes. This is intended to provide a
+ consistent way to measure how changes to the dma_pool_alloc/free
+ routines affect performance.
+
config GUP_GET_PTE_LOW_HIGH
bool

diff --git a/mm/Makefile b/mm/Makefile
index 8e105e5b3e293..3a08f5d7b1782 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_MEMCG) += swap_cgroup.o
endif
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_GUP_TEST) += gup_test.o
+obj-$(CONFIG_DMAPOOL_TEST) += dmapool_test.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
diff --git a/mm/dmapool_test.c b/mm/dmapool_test.c
new file mode 100644
index 0000000000000..c4292dfda3690
--- /dev/null
+++ b/mm/dmapool_test.c
@@ -0,0 +1,146 @@
+#include <linux/device.h>
+#include <linux/dma-map-ops.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/kernel.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+#define NR_TESTS (100)
+
+struct dma_pool_pair {
+ dma_addr_t dma;
+ void *v;
+};
+
+struct dmapool_parms {
+ size_t size;
+ size_t align;
+ size_t boundary;
+};
+
+static const struct dmapool_parms pool_parms[] = {
+ { .size = 16, .align = 16, .boundary = 0 },
+ { .size = 64, .align = 64, .boundary = 0 },
+ { .size = 256, .align = 256, .boundary = 0 },
+ { .size = 1024, .align = 1024, .boundary = 0 },
+ { .size = 4096, .align = 4096, .boundary = 0 },
+ { .size = 68, .align = 32, .boundary = 4096 },
+};
+
+static struct dma_pool *pool;
+static struct device test_dev;
+static u64 dma_mask;
+
+static inline int nr_blocks(int size)
+{
+ return clamp_t(int, (PAGE_SIZE / size) * 512, 1024, 8192);
+}
+
+static int dmapool_test_alloc(struct dma_pool_pair *p, int blocks)
+{
+ int i;
+
+ for (i = 0; i < blocks; i++) {
+ p[i].v = dma_pool_alloc(pool, GFP_KERNEL,
+ &p[i].dma);
+ if (!p[i].v)
+ goto pool_fail;
+ }
+
+ for (i = 0; i < blocks; i++)
+ dma_pool_free(pool, p[i].v, p[i].dma);
+
+ return 0;
+
+pool_fail:
+ for (--i; i >= 0; i--)
+ dma_pool_free(pool, p[i].v, p[i].dma);
+ return -ENOMEM;
+}
+
+static int dmapool_test_block(const struct dmapool_parms *parms)
+{
+ int blocks = nr_blocks(parms->size);
+ ktime_t start_time, end_time;
+ struct dma_pool_pair *p;
+ int i, ret;
+
+ p = kcalloc(blocks, sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ pool = dma_pool_create("test pool", &test_dev, parms->size,
+ parms->align, parms->boundary);
+ if (!pool) {
+ ret = -ENOMEM;
+ goto free_pairs;
+ }
+
+ start_time = ktime_get();
+ for (i = 0; i < NR_TESTS; i++) {
+ ret = dmapool_test_alloc(p, blocks);
+ if (ret)
+ goto free_pool;
+ if (need_resched())
+ cond_resched();
+ }
+ end_time = ktime_get();
+
+ printk("dmapool test: size:%-4ld blocks:%-6d time:%llu\n", parms->size,
+ blocks, ktime_us_delta(end_time, start_time));
+
+free_pool:
+ dma_pool_destroy(pool);
+free_pairs:
+ kfree(p);
+ return ret;
+}
+
+static void dmapool_test_release(struct device *dev)
+{
+}
+
+static int dmapool_checks(void)
+{
+ int i, ret;
+
+ ret = dev_set_name(&test_dev, "dmapool-test");
+ if (ret)
+ return ret;
+
+ ret = device_register(&test_dev);
+ if (ret) {
+ printk("%s: register failed:%d\n", __func__, ret);
+ goto put_device;
+ }
+
+ test_dev.release = dmapool_test_release;
+ set_dma_ops(&test_dev, NULL);
+ test_dev.dma_mask = &dma_mask;
+ ret = dma_set_mask_and_coherent(&test_dev, DMA_BIT_MASK(64));
+ if (ret) {
+ printk("%s: mask failed:%d\n", __func__, ret);
+ goto del_device;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(pool_parms); i++) {
+ ret = dmapool_test_block(&pool_parms[i]);
+ if (ret)
+ break;
+ }
+
+del_device:
+ device_del(&test_dev);
+put_device:
+ put_device(&test_dev);
+ return ret;
+}
+
+static void dmapool_exit(void)
+{
+}
+
+module_init(dmapool_checks);
+module_exit(dmapool_exit);
+MODULE_LICENSE("GPL");
--
2.30.2

2022-12-16 20:33:41

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 02/11] dmapool: remove checks for dev == NULL

From: Tony Battersby <[email protected]>

dmapool originally tried to support pools without a device because
dma_alloc_coherent() supports allocations without a device. But nobody
ended up using dma pools without a device, and trying to do so will
result in an oops. So remove the checks for pool->dev == NULL since they
are unneeded bloat.

Signed-off-by: Tony Battersby <[email protected]>
[added check for null dev on create]
Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 45 ++++++++++++++-------------------------------
1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index a7eb5d0eb2da7..559207e1c3339 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -134,6 +134,9 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
size_t allocation;
bool empty = false;

+ if (!dev)
+ return NULL;
+
if (align == 0)
align = 1;
else if (align & (align - 1))
@@ -275,7 +278,7 @@ void dma_pool_destroy(struct dma_pool *pool)
mutex_lock(&pools_reg_lock);
mutex_lock(&pools_lock);
list_del(&pool->pools);
- if (pool->dev && list_empty(&pool->dev->dma_pools))
+ if (list_empty(&pool->dev->dma_pools))
empty = true;
mutex_unlock(&pools_lock);
if (empty)
@@ -284,12 +287,8 @@ void dma_pool_destroy(struct dma_pool *pool)

list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
if (is_page_busy(page)) {
- if (pool->dev)
- dev_err(pool->dev, "%s %s, %p busy\n", __func__,
- pool->name, page->vaddr);
- else
- pr_err("%s %s, %p busy\n", __func__,
- pool->name, page->vaddr);
+ dev_err(pool->dev, "%s %s, %p busy\n", __func__,
+ pool->name, page->vaddr);
/* leak the still-in-use consistent memory */
list_del(&page->page_list);
kfree(page);
@@ -351,12 +350,8 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
for (i = sizeof(page->offset); i < pool->size; i++) {
if (data[i] == POOL_POISON_FREED)
continue;
- if (pool->dev)
- dev_err(pool->dev, "%s %s, %p (corrupted)\n",
- __func__, pool->name, retval);
- else
- pr_err("%s %s, %p (corrupted)\n",
- __func__, pool->name, retval);
+ dev_err(pool->dev, "%s %s, %p (corrupted)\n",
+ __func__, pool->name, retval);

/*
* Dump the first 4 bytes even if they are not
@@ -411,12 +406,8 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
page = pool_find_page(pool, dma);
if (!page) {
spin_unlock_irqrestore(&pool->lock, flags);
- if (pool->dev)
- dev_err(pool->dev, "%s %s, %p/%pad (bad dma)\n",
- __func__, pool->name, vaddr, &dma);
- else
- pr_err("%s %s, %p/%pad (bad dma)\n",
- __func__, pool->name, vaddr, &dma);
+ dev_err(pool->dev, "%s %s, %p/%pad (bad dma)\n",
+ __func__, pool->name, vaddr, &dma);
return;
}

@@ -426,12 +417,8 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
#ifdef DMAPOOL_DEBUG
if ((dma - page->dma) != offset) {
spin_unlock_irqrestore(&pool->lock, flags);
- if (pool->dev)
- dev_err(pool->dev, "%s %s, %p (bad vaddr)/%pad\n",
- __func__, pool->name, vaddr, &dma);
- else
- pr_err("%s %s, %p (bad vaddr)/%pad\n",
- __func__, pool->name, vaddr, &dma);
+ dev_err(pool->dev, "%s %s, %p (bad vaddr)/%pad\n",
+ __func__, pool->name, vaddr, &dma);
return;
}
{
@@ -442,12 +429,8 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
continue;
}
spin_unlock_irqrestore(&pool->lock, flags);
- if (pool->dev)
- dev_err(pool->dev, "%s %s, dma %pad already free\n",
- __func__, pool->name, &dma);
- else
- pr_err("%s %s, dma %pad already free\n",
- __func__, pool->name, &dma);
+ dev_err(pool->dev, "%s %s, dma %pad already free\n",
+ __func__, pool->name, &dma);
return;
}
}
--
2.30.2

2022-12-16 20:33:59

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 09/11] dmapool: simplify freeing

From: Keith Busch <[email protected]>

The actions for busy and not busy are mostly the same, so combine these
and remove the unnecessary function. Also, the pool is about to be freed
so there's no need to poison the page data since we only check for
poison on alloc, which can't be done on a freed pool.

Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 29 ++++++-----------------------
1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 33d20ceff18c5..44622f2bf4641 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -241,18 +241,6 @@ static inline bool is_page_busy(struct dma_page *page)
return page->in_use != 0;
}

-static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
-{
- dma_addr_t dma = page->dma;
-
-#ifdef DMAPOOL_DEBUG
- memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
-#endif
- dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
- list_del(&page->page_list);
- kfree(page);
-}
-
/**
* dma_pool_destroy - destroys a pool of dma memory blocks.
* @pool: dma pool that will be destroyed
@@ -280,14 +268,14 @@ void dma_pool_destroy(struct dma_pool *pool)
mutex_unlock(&pools_reg_lock);

list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
- if (is_page_busy(page)) {
+ if (!is_page_busy(page))
+ dma_free_coherent(pool->dev, pool->allocation,
+ page->vaddr, page->dma);
+ else
dev_err(pool->dev, "%s %s, %p busy\n", __func__,
pool->name, page->vaddr);
- /* leak the still-in-use consistent memory */
- list_del(&page->page_list);
- kfree(page);
- } else
- pool_free_page(pool, page);
+ list_del(&page->page_list);
+ kfree(page);
}

kfree(pool);
@@ -445,11 +433,6 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
page->in_use--;
*(int *)vaddr = page->offset;
page->offset = offset;
- /*
- * Resist a temptation to do
- * if (!is_page_busy(page)) pool_free_page(pool, page);
- * Better have a few empty pages hang around.
- */
spin_unlock_irqrestore(&pool->lock, flags);
}
EXPORT_SYMBOL(dma_pool_free);
--
2.30.2

2022-12-16 20:51:31

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 06/11] dmapool: move debug code to own functions

From: Keith Busch <[email protected]>

Clean up the normal path by moving the debug code outside it.

Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 96 +++++++++++++++++++++++++++++-----------------------
1 file changed, 54 insertions(+), 42 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index eaed3ffb42aa8..8a7aa19e650a1 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -294,6 +294,32 @@ void dma_pool_destroy(struct dma_pool *pool)
}
EXPORT_SYMBOL(dma_pool_destroy);

+static inline void pool_check_block(struct dma_pool *pool, void *retval,
+ unsigned int offset, gfp_t mem_flags)
+{
+#ifdef DMAPOOL_DEBUG
+ int i;
+ u8 *data = retval;
+ /* page->offset is stored in first 4 bytes */
+ for (i = sizeof(offset); i < pool->size; i++) {
+ if (data[i] == POOL_POISON_FREED)
+ continue;
+ dev_err(pool->dev, "%s %s, %p (corrupted)\n",
+ __func__, pool->name, retval);
+
+ /*
+ * Dump the first 4 bytes even if they are not
+ * POOL_POISON_FREED
+ */
+ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1,
+ data, pool->size, 1);
+ break;
+ }
+ if (!want_init_on_alloc(mem_flags))
+ memset(retval, POOL_POISON_ALLOCATED, pool->size);
+#endif
+}
+
/**
* dma_pool_alloc - get a block of consistent memory
* @pool: dma pool that will produce the block
@@ -336,29 +362,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
page->offset = *(int *)(page->vaddr + offset);
retval = offset + page->vaddr;
*handle = offset + page->dma;
-#ifdef DMAPOOL_DEBUG
- {
- int i;
- u8 *data = retval;
- /* page->offset is stored in first 4 bytes */
- for (i = sizeof(page->offset); i < pool->size; i++) {
- if (data[i] == POOL_POISON_FREED)
- continue;
- dev_err(pool->dev, "%s %s, %p (corrupted)\n",
- __func__, pool->name, retval);
-
- /*
- * Dump the first 4 bytes even if they are not
- * POOL_POISON_FREED
- */
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1,
- data, pool->size, 1);
- break;
- }
- }
- if (!want_init_on_alloc(mem_flags))
- memset(retval, POOL_POISON_ALLOCATED, pool->size);
-#endif
+ pool_check_block(pool, retval, offset, mem_flags);
spin_unlock_irqrestore(&pool->lock, flags);

if (want_init_on_alloc(mem_flags))
@@ -381,6 +385,32 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
return NULL;
}

+static inline bool pool_page_err(struct dma_pool *pool, struct dma_page *page,
+ void *vaddr)
+{
+#ifdef DMAPOOL_DEBUG
+ unsigned int chain = page->offset;
+
+ if ((dma - page->dma) != offset) {
+ dev_err(pool->dev, "%s %s, %p (bad vaddr)/%pad\n",
+ __func__, pool->name, vaddr, &dma);
+ return true;
+ }
+
+ while (chain < pool->allocation) {
+ if (chain != offset) {
+ chain = *(int *)(page->vaddr + chain);
+ continue;
+ }
+ dev_err(pool->dev, "%s %s, dma %pad already free\n",
+ __func__, pool->name, &dma);
+ return true;
+ }
+ memset(vaddr, POOL_POISON_FREED, pool->size);
+#endif
+ return false;
+}
+
/**
* dma_pool_free - put block back into dma pool
* @pool: the dma pool holding the block
@@ -408,28 +438,10 @@ void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
offset = vaddr - page->vaddr;
if (want_init_on_free())
memset(vaddr, 0, pool->size);
-#ifdef DMAPOOL_DEBUG
- if ((dma - page->dma) != offset) {
+ if (pool_page_err(pool, page, vaddr)) {
spin_unlock_irqrestore(&pool->lock, flags);
- dev_err(pool->dev, "%s %s, %p (bad vaddr)/%pad\n",
- __func__, pool->name, vaddr, &dma);
return;
}
- {
- unsigned int chain = page->offset;
- while (chain < pool->allocation) {
- if (chain != offset) {
- chain = *(int *)(page->vaddr + chain);
- continue;
- }
- spin_unlock_irqrestore(&pool->lock, flags);
- dev_err(pool->dev, "%s %s, dma %pad already free\n",
- __func__, pool->name, &dma);
- return;
- }
- }
- memset(vaddr, POOL_POISON_FREED, pool->size);
-#endif

page->in_use--;
*(int *)vaddr = page->offset;
--
2.30.2

2022-12-16 20:52:26

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 04/11] dmapool: cleanup integer types

From: Tony Battersby <[email protected]>

To represent the size of a single allocation, dmapool currently uses
'unsigned int' in some places and 'size_t' in other places. Standardize
on 'unsigned int' to reduce overhead, but use 'size_t' when counting all
the blocks in the entire pool.

Signed-off-by: Tony Battersby <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 20616b760bb9c..ee993bb59fc27 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -43,10 +43,10 @@
struct dma_pool { /* the pool */
struct list_head page_list;
spinlock_t lock;
- size_t size;
struct device *dev;
- size_t allocation;
- size_t boundary;
+ unsigned int size;
+ unsigned int allocation;
+ unsigned int boundary;
char name[32];
struct list_head pools;
};
@@ -73,7 +73,7 @@ static ssize_t pools_show(struct device *dev, struct device_attribute *attr, cha
mutex_lock(&pools_lock);
list_for_each_entry(pool, &dev->dma_pools, pools) {
unsigned pages = 0;
- unsigned blocks = 0;
+ size_t blocks = 0;

spin_lock_irq(&pool->lock);
list_for_each_entry(page, &pool->page_list, page_list) {
@@ -83,9 +83,10 @@ static ssize_t pools_show(struct device *dev, struct device_attribute *attr, cha
spin_unlock_irq(&pool->lock);

/* per-pool info, no real statistics yet */
- size += sysfs_emit_at(buf, size, "%-16s %4u %4zu %4zu %2u\n",
+ size += sysfs_emit_at(buf, size, "%-16s %4zu %4zu %4u %2u\n",
pool->name, blocks,
- pages * (pool->allocation / pool->size),
+ (size_t) pages *
+ (pool->allocation / pool->size),
pool->size, pages);
}
mutex_unlock(&pools_lock);
@@ -133,7 +134,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
else if (align & (align - 1))
return NULL;

- if (size == 0)
+ if (size == 0 || size > INT_MAX)
return NULL;
else if (size < 4)
size = 4;
@@ -146,6 +147,8 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
else if ((boundary < size) || (boundary & (boundary - 1)))
return NULL;

+ boundary = min(boundary, allocation);
+
retval = kmalloc(sizeof(*retval), GFP_KERNEL);
if (!retval)
return retval;
@@ -306,7 +309,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
{
unsigned long flags;
struct dma_page *page;
- size_t offset;
+ unsigned int offset;
void *retval;

might_alloc(mem_flags);
--
2.30.2

2022-12-16 20:52:35

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 03/11] dmapool: use sysfs_emit() instead of scnprintf()

From: Tony Battersby <[email protected]>

Use sysfs_emit instead of scnprintf, snprintf or sprintf.

Signed-off-by: Tony Battersby <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 23 +++++++----------------
1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index 559207e1c3339..20616b760bb9c 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -64,18 +64,11 @@ static DEFINE_MUTEX(pools_reg_lock);

static ssize_t pools_show(struct device *dev, struct device_attribute *attr, char *buf)
{
- unsigned temp;
- unsigned size;
- char *next;
+ int size;
struct dma_page *page;
struct dma_pool *pool;

- next = buf;
- size = PAGE_SIZE;
-
- temp = scnprintf(next, size, "poolinfo - 0.1\n");
- size -= temp;
- next += temp;
+ size = sysfs_emit(buf, "poolinfo - 0.1\n");

mutex_lock(&pools_lock);
list_for_each_entry(pool, &dev->dma_pools, pools) {
@@ -90,16 +83,14 @@ static ssize_t pools_show(struct device *dev, struct device_attribute *attr, cha
spin_unlock_irq(&pool->lock);

/* per-pool info, no real statistics yet */
- temp = scnprintf(next, size, "%-16s %4u %4zu %4zu %2u\n",
- pool->name, blocks,
- pages * (pool->allocation / pool->size),
- pool->size, pages);
- size -= temp;
- next += temp;
+ size += sysfs_emit_at(buf, size, "%-16s %4u %4zu %4zu %2u\n",
+ pool->name, blocks,
+ pages * (pool->allocation / pool->size),
+ pool->size, pages);
}
mutex_unlock(&pools_lock);

- return PAGE_SIZE - size;
+ return size;
}

static DEVICE_ATTR_RO(pools);
--
2.30.2

2022-12-16 21:02:58

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 05/11] dmapool: speedup DMAPOOL_DEBUG with init_on_alloc

From: Tony Battersby <[email protected]>

Avoid double-memset of the same allocated memory in dma_pool_alloc()
when both DMAPOOL_DEBUG is enabled and init_on_alloc=1.

Signed-off-by: Tony Battersby <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
mm/dmapool.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/dmapool.c b/mm/dmapool.c
index ee993bb59fc27..eaed3ffb42aa8 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -356,7 +356,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
break;
}
}
- if (!(mem_flags & __GFP_ZERO))
+ if (!want_init_on_alloc(mem_flags))
memset(retval, POOL_POISON_ALLOCATED, pool->size);
#endif
spin_unlock_irqrestore(&pool->lock, flags);
--
2.30.2

2022-12-23 16:43:37

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCHv2 04/11] dmapool: cleanup integer types

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2022-12-23 16:58:21

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCHv2 03/11] dmapool: use sysfs_emit() instead of scnprintf()

> + size += sysfs_emit_at(buf, size, "%-16s %4u %4zu %4zu %2u\n",
> + pool->name, blocks,
> + pages * (pool->allocation / pool->size),
> + pool->size, pages);

Did I mention that the sysfs_emit_at API sucks and should just take
size as a pointer argument an auto-increment?

Not really something we can change in this series, though. So:

Reviewed-by: Christoph Hellwig <[email protected]>

2022-12-23 17:18:52

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCHv2 02/11] dmapool: remove checks for dev == NULL

Looks good:

Reviewed-by: Christoph Hellwig <[email protected]>

2022-12-23 17:39:11

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCHv2 06/11] dmapool: move debug code to own functions

> +static inline void pool_check_block(struct dma_pool *pool, void *retval,
> + unsigned int offset, gfp_t mem_flags)
> +{
> +#ifdef DMAPOOL_DEBUG

We normally keep the ifdef outside the function and provide stubs.
Extra points for keeping all the debug helpers in a single ifdef
block.

2022-12-23 17:51:42

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCHv2 09/11] dmapool: simplify freeing

> @@ -280,14 +268,14 @@ void dma_pool_destroy(struct dma_pool *pool)
> mutex_unlock(&pools_reg_lock);
>
> list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
> + if (!is_page_busy(page))
> + dma_free_coherent(pool->dev, pool->allocation,
> + page->vaddr, page->dma);
> + else
> dev_err(pool->dev, "%s %s, %p busy\n", __func__,
> pool->name, page->vaddr);
> + list_del(&page->page_list);
> + kfree(page);

Hmm. The is_page_busy case is really a should not happen case.
What is the benefit of skipping the dma_free_coherent and leaking
memory here, vs letting KASAN and friends see the free and possibly
help with debugging? In other words, why is this not:

WARN_ON_ONCE(is_page_busy(page));
dma_free_coherent(pool->dev, pool->allocation, page->vaddr,
page->dma);
...

> page->in_use--;
> *(int *)vaddr = page->offset;
> page->offset = offset;
> - /*
> - * Resist a temptation to do
> - * if (!is_page_busy(page)) pool_free_page(pool, page);
> - * Better have a few empty pages hang around.
> - */

This doesn't look related to the rest, or am I missing something?

2022-12-27 20:56:33

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCHv2 09/11] dmapool: simplify freeing

On Fri, Dec 23, 2022 at 08:38:55AM -0800, Christoph Hellwig wrote:
> > @@ -280,14 +268,14 @@ void dma_pool_destroy(struct dma_pool *pool)
> > mutex_unlock(&pools_reg_lock);
> >
> > list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
> > + if (!is_page_busy(page))
> > + dma_free_coherent(pool->dev, pool->allocation,
> > + page->vaddr, page->dma);
> > + else
> > dev_err(pool->dev, "%s %s, %p busy\n", __func__,
> > pool->name, page->vaddr);
> > + list_del(&page->page_list);
> > + kfree(page);
>
> Hmm. The is_page_busy case is really a should not happen case.
> What is the benefit of skipping the dma_free_coherent and leaking
> memory here, vs letting KASAN and friends see the free and possibly
> help with debugging? In other words, why is this not:
>
> WARN_ON_ONCE(is_page_busy(page));
> dma_free_coherent(pool->dev, pool->allocation, page->vaddr,
> page->dma);
> ...

The memory is presumed to still be owned by the device in this case, so
the kernel shouldn't free it. I don't think KASAN will be very helpful
if the device corrupts memory.

> > page->in_use--;
> > *(int *)vaddr = page->offset;
> > page->offset = offset;
> > - /*
> > - * Resist a temptation to do
> > - * if (!is_page_busy(page)) pool_free_page(pool, page);
> > - * Better have a few empty pages hang around.
> > - */
>
> This doesn't look related to the rest, or am I missing something?

Oops, this was supposed to go with a later patch in this series that
removed "is_page_busy()".