LinuxLists.cc - [RFC][PATCH v6 0/7] Generic page pool & deferred freeing for system dmabuf heap

[permalink] [raw]

Subject: [RFC][PATCH v6 5/7] dma-buf: heaps: Add deferred-free-helper library code

This patch provides infrastructure for deferring buffer frees.

This is a feature ION provided which when used with some form
of a page pool, provides a nice performance boost in an
allocation microbenchmark. The reason it helps is it allows the
page-zeroing to be done out of the normal allocation/free path,
and pushed off to a kthread.

As not all heaps will find this useful, its implemented as
a optional helper library that heaps can utilize.

Cc: Daniel Vetter <[email protected]>
Cc: Christian Koenig <[email protected]>
Cc: Sumit Semwal <[email protected]>
Cc: Liam Mark <[email protected]>
Cc: Chris Goldsworthy <[email protected]>
Cc: Laura Abbott <[email protected]>
Cc: Brian Starkey <[email protected]>
Cc: Hridya Valsaraju <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Sandeep Patil <[email protected]>
Cc: Daniel Mentz <[email protected]>
Cc: Ørjan Eide <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Ezequiel Garcia <[email protected]>
Cc: Simon Ser <[email protected]>
Cc: James Jones <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: John Stultz <[email protected]>
---
v2:
* Fix sleep in atomic issue from using a mutex, by switching
to a spinlock as Reported-by: kernel test robot <[email protected]>
* Cleanup API to use a reason enum for clarity and add some documentation
comments as suggested by Suren Baghdasaryan.
v3:
* Minor tweaks so it can be built as a module
* A few small fixups suggested by Daniel Mentz
v4:
* Tweak from Daniel Mentz to make sure the shrinker
count/freed values are tracked in pages not bytes
v5:
* Fix up page count tracking as suggested by Suren Baghdasaryan
---
drivers/dma-buf/heaps/Kconfig | 3 +
drivers/dma-buf/heaps/Makefile | 1 +
drivers/dma-buf/heaps/deferred-free-helper.c | 145 +++++++++++++++++++
drivers/dma-buf/heaps/deferred-free-helper.h | 55 +++++++
4 files changed, 204 insertions(+)
create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.c
create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.h

diff --git a/drivers/dma-buf/heaps/Kconfig b/drivers/dma-buf/heaps/Kconfig
index a5eef06c4226..f7aef8bc7119 100644
--- a/drivers/dma-buf/heaps/Kconfig
+++ b/drivers/dma-buf/heaps/Kconfig
@@ -1,3 +1,6 @@
+config DMABUF_HEAPS_DEFERRED_FREE
+ tristate
+
config DMABUF_HEAPS_SYSTEM
bool "DMA-BUF System Heap"
depends on DMABUF_HEAPS
diff --git a/drivers/dma-buf/heaps/Makefile b/drivers/dma-buf/heaps/Makefile
index 974467791032..4e7839875615 100644
--- a/drivers/dma-buf/heaps/Makefile
+++ b/drivers/dma-buf/heaps/Makefile
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DMABUF_HEAPS_DEFERRED_FREE) += deferred-free-helper.o
obj-$(CONFIG_DMABUF_HEAPS_SYSTEM) += system_heap.o
obj-$(CONFIG_DMABUF_HEAPS_CMA) += cma_heap.o
diff --git a/drivers/dma-buf/heaps/deferred-free-helper.c b/drivers/dma-buf/heaps/deferred-free-helper.c
new file mode 100644
index 000000000000..672c3d5872e9
--- /dev/null
+++ b/drivers/dma-buf/heaps/deferred-free-helper.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Deferred dmabuf freeing helper
+ *
+ * Copyright (C) 2020 Linaro, Ltd.
+ *
+ * Based on the ION page pool code
+ * Copyright (C) 2011 Google, Inc.
+ */
+
+#include <linux/freezer.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/sched/signal.h>
+
+#include "deferred-free-helper.h"
+
+static LIST_HEAD(free_list);
+static size_t list_size_pages;
+wait_queue_head_t freelist_waitqueue;
+struct task_struct *freelist_task;
+static DEFINE_SPINLOCK(free_list_lock);
+
+static inline size_t size_to_pages(size_t size)
+{
+ if (!size)
+ return 0;
+ return ((size - 1) >> PAGE_SHIFT) + 1;
+}
+
+void deferred_free(struct deferred_freelist_item *item,
+ void (*free)(struct deferred_freelist_item*,
+ enum df_reason),
+ size_t size)
+{
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&item->list);
+ item->size = size;
+ item->free = free;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+ list_add(&item->list, &free_list);
+ list_size_pages += size_to_pages(size);
+ spin_unlock_irqrestore(&free_list_lock, flags);
+ wake_up(&freelist_waitqueue);
+}
+EXPORT_SYMBOL_GPL(deferred_free);
+
+static size_t free_one_item(enum df_reason reason)
+{
+ unsigned long flags;
+ size_t nr_pages;
+ struct deferred_freelist_item *item;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+ if (list_empty(&free_list)) {
+ spin_unlock_irqrestore(&free_list_lock, flags);
+ return 0;
+ }
+ item = list_first_entry(&free_list, struct deferred_freelist_item, list);
+ list_del(&item->list);
+ nr_pages = size_to_pages(item->size);
+ list_size_pages -= nr_pages;
+ spin_unlock_irqrestore(&free_list_lock, flags);
+
+ item->free(item, reason);
+ return nr_pages;
+}
+
+static unsigned long get_freelist_size_pages(void)
+{
+ unsigned long size;
+ unsigned long flags;
+
+ spin_lock_irqsave(&free_list_lock, flags);
+ size = list_size_pages;
+ spin_unlock_irqrestore(&free_list_lock, flags);
+ return size;
+}
+
+static unsigned long freelist_shrink_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ return get_freelist_size_pages();
+}
+
+static unsigned long freelist_shrink_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ unsigned long total_freed = 0;
+
+ if (sc->nr_to_scan == 0)
+ return 0;
+
+ while (total_freed < sc->nr_to_scan) {
+ size_t pages_freed = free_one_item(DF_UNDER_PRESSURE);
+
+ if (!pages_freed)
+ break;
+
+ total_freed += pages_freed;
+ }
+
+ return total_freed;
+}
+
+static struct shrinker freelist_shrinker = {
+ .count_objects = freelist_shrink_count,
+ .scan_objects = freelist_shrink_scan,
+ .seeks = DEFAULT_SEEKS,
+ .batch = 0,
+};
+
+static int deferred_free_thread(void *data)
+{
+ while (true) {
+ wait_event_freezable(freelist_waitqueue,
+ get_freelist_size_pages() > 0);
+
+ free_one_item(DF_NORMAL);
+ }
+
+ return 0;
+}
+
+static int deferred_freelist_init(void)
+{
+ list_size_pages = 0;
+
+ init_waitqueue_head(&freelist_waitqueue);
+ freelist_task = kthread_run(deferred_free_thread, NULL,
+ "%s", "dmabuf-deferred-free-worker");
+ if (IS_ERR(freelist_task)) {
+ pr_err("Creating thread for deferred free failed\n");
+ return -1;
+ }
+ sched_set_normal(freelist_task, 19);
+
+ return register_shrinker(&freelist_shrinker);
+}
+module_init(deferred_freelist_init);
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma-buf/heaps/deferred-free-helper.h b/drivers/dma-buf/heaps/deferred-free-helper.h
new file mode 100644
index 000000000000..18b44ac86ef6
--- /dev/null
+++ b/drivers/dma-buf/heaps/deferred-free-helper.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef DEFERRED_FREE_HELPER_H
+#define DEFERRED_FREE_HELPER_H
+
+/**
+ * df_reason - enum for reason why item was freed
+ *
+ * This provides a reason for why the free function was called
+ * on the item. This is useful when deferred_free is used in
+ * combination with a pagepool, so under pressure the page can
+ * be immediately freed.
+ *
+ * DF_NORMAL: Normal deferred free
+ *
+ * DF_UNDER_PRESSURE: Free was called because the system
+ * is under memory pressure. Usually
+ * from a shrinker. Avoid allocating
+ * memory in the free call, as it may
+ * fail.
+ */
+enum df_reason {
+ DF_NORMAL,
+ DF_UNDER_PRESSURE,
+};
+
+/**
+ * deferred_freelist_item - item structure for deferred freelist
+ *
+ * This is to be added to the structure for whatever you want to
+ * defer freeing on.
+ *
+ * @size: size of the item to be freed
+ * @free: function pointer to be called when freeing the item
+ * @list: list entry for the deferred list
+ */
+struct deferred_freelist_item {
+ size_t size;
+ void (*free)(struct deferred_freelist_item *i,
+ enum df_reason reason);
+ struct list_head list;
+};
+
+/**
+ * deferred_free - call to add item to the deferred free list
+ *
+ * @item: Pointer to deferred_freelist_item field of a structure
+ * @free: Function pointer to the free call
+ * @size: Size of the item to be freed
+ */
+void deferred_free(struct deferred_freelist_item *item,
+ void (*free)(struct deferred_freelist_item *i,
+ enum df_reason reason),
+ size_t size);
+#endif
--
2.25.1

2021-02-05 08:15:38

[permalink] [raw]

Subject: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

This adds a shrinker controlled page pool, closely
following the ttm_pool logic, which is abstracted out
a bit so it can be used by other non-ttm drivers.

Cc: Daniel Vetter <[email protected]>
Cc: Christian Koenig <[email protected]>
Cc: Sumit Semwal <[email protected]>
Cc: Liam Mark <[email protected]>
Cc: Chris Goldsworthy <[email protected]>
Cc: Laura Abbott <[email protected]>
Cc: Brian Starkey <[email protected]>
Cc: Hridya Valsaraju <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: Sandeep Patil <[email protected]>
Cc: Daniel Mentz <[email protected]>
Cc: Ørjan Eide <[email protected]>
Cc: Robin Murphy <[email protected]>
Cc: Ezequiel Garcia <[email protected]>
Cc: Simon Ser <[email protected]>
Cc: James Jones <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: John Stultz <[email protected]>
---
drivers/gpu/drm/Kconfig | 4 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/page_pool.c | 220 ++++++++++++++++++++++++++++++++++++
include/drm/page_pool.h | 54 +++++++++
4 files changed, 279 insertions(+)
create mode 100644 drivers/gpu/drm/page_pool.c
create mode 100644 include/drm/page_pool.h

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0973f408d75f..d16bf340ed2e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -174,6 +174,10 @@ config DRM_DP_CEC
Note: not all adapters support this feature, and even for those
that do support this they often do not hook up the CEC pin.

+config DRM_PAGE_POOL
+ bool
+ depends on DRM
+
config DRM_TTM
tristate
depends on DRM && MMU
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index fefaff4c832d..877e0111ed34 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -32,6 +32,7 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
drm-$(CONFIG_PCI) += drm_pci.o
drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
+drm-$(CONFIG_DRM_PAGE_POOL) += page_pool.o

drm_vram_helper-y := drm_gem_vram_helper.o
obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
new file mode 100644
index 000000000000..2139f86e6ca7
--- /dev/null
+++ b/drivers/gpu/drm/page_pool.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DRM page pool system
+ *
+ * Copyright (C) 2020 Linaro Ltd.
+ *
+ * Based on the ION page pool code
+ * Copyright (C) 2011 Google, Inc.
+ * As well as the ttm_pool code
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/freezer.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/sched/signal.h>
+#include <drm/page_pool.h>
+
+static LIST_HEAD(pool_list);
+static DEFINE_MUTEX(pool_list_lock);
+static atomic_long_t total_pages;
+static unsigned long page_pool_max;
+MODULE_PARM_DESC(page_pool_max, "Number of pages in the WC/UC/DMA pool");
+module_param(page_pool_max, ulong, 0644);
+
+void drm_page_pool_set_max(unsigned long max)
+{
+ /* only write once */
+ if (!page_pool_max)
+ page_pool_max = max;
+}
+
+unsigned long drm_page_pool_get_max(void)
+{
+ return page_pool_max;
+}
+
+unsigned long drm_page_pool_get_total(void)
+{
+ return atomic_long_read(&total_pages);
+}
+
+int drm_page_pool_get_size(struct drm_page_pool *pool)
+{
+ int ret;
+
+ spin_lock(&pool->lock);
+ ret = pool->count;
+ spin_unlock(&pool->lock);
+ return ret;
+}
+
+static inline unsigned int drm_page_pool_free_pages(struct drm_page_pool *pool,
+ struct page *page)
+{
+ return pool->free(page, pool->order);
+}
+
+static int drm_page_pool_shrink_one(void);
+
+void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
+{
+ spin_lock(&pool->lock);
+ list_add_tail(&page->lru, &pool->items);
+ pool->count++;
+ atomic_long_add(1 << pool->order, &total_pages);
+ spin_unlock(&pool->lock);
+
+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+ 1 << pool->order);
+
+ /* make sure we don't grow too large */
+ while (page_pool_max && atomic_long_read(&total_pages) > page_pool_max)
+ drm_page_pool_shrink_one();
+}
+EXPORT_SYMBOL_GPL(drm_page_pool_add);
+
+static struct page *drm_page_pool_remove(struct drm_page_pool *pool)
+{
+ struct page *page;
+
+ if (!pool->count)
+ return NULL;
+
+ page = list_first_entry(&pool->items, struct page, lru);
+ pool->count--;
+ atomic_long_sub(1 << pool->order, &total_pages);
+
+ list_del(&page->lru);
+ mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+ -(1 << pool->order));
+ return page;
+}
+
+struct page *drm_page_pool_fetch(struct drm_page_pool *pool)
+{
+ struct page *page = NULL;
+
+ if (!pool) {
+ WARN_ON(!pool);
+ return NULL;
+ }
+
+ spin_lock(&pool->lock);
+ page = drm_page_pool_remove(pool);
+ spin_unlock(&pool->lock);
+
+ return page;
+}
+EXPORT_SYMBOL_GPL(drm_page_pool_fetch);
+
+struct drm_page_pool *drm_page_pool_create(unsigned int order,
+ int (*free_page)(struct page *p, unsigned int order))
+{
+ struct drm_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+
+ if (!pool)
+ return NULL;
+
+ pool->count = 0;
+ INIT_LIST_HEAD(&pool->items);
+ pool->order = order;
+ pool->free = free_page;
+ spin_lock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->list);
+
+ mutex_lock(&pool_list_lock);
+ list_add(&pool->list, &pool_list);
+ mutex_unlock(&pool_list_lock);
+
+ return pool;
+}
+EXPORT_SYMBOL_GPL(drm_page_pool_create);
+
+void drm_page_pool_destroy(struct drm_page_pool *pool)
+{
+ struct page *page;
+
+ /* Remove us from the pool list */
+ mutex_lock(&pool_list_lock);
+ list_del(&pool->list);
+ mutex_unlock(&pool_list_lock);
+
+ /* Free any remaining pages in the pool */
+ spin_lock(&pool->lock);
+ while (pool->count) {
+ page = drm_page_pool_remove(pool);
+ spin_unlock(&pool->lock);
+ drm_page_pool_free_pages(pool, page);
+ spin_lock(&pool->lock);
+ }
+ spin_unlock(&pool->lock);
+
+ kfree(pool);
+}
+EXPORT_SYMBOL_GPL(drm_page_pool_destroy);
+
+static int drm_page_pool_shrink_one(void)
+{
+ struct drm_page_pool *pool;
+ struct page *page;
+ int nr_freed = 0;
+
+ mutex_lock(&pool_list_lock);
+ pool = list_first_entry(&pool_list, typeof(*pool), list);
+
+ spin_lock(&pool->lock);
+ page = drm_page_pool_remove(pool);
+ spin_unlock(&pool->lock);
+
+ if (page)
+ nr_freed = drm_page_pool_free_pages(pool, page);
+
+ list_move_tail(&pool->list, &pool_list);
+ mutex_unlock(&pool_list_lock);
+
+ return nr_freed;
+}
+
+static unsigned long drm_page_pool_shrink_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ unsigned long count = atomic_long_read(&total_pages);
+
+ return count ? count : SHRINK_EMPTY;
+}
+
+static unsigned long drm_page_pool_shrink_scan(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int to_scan = sc->nr_to_scan;
+ int nr_total = 0;
+
+ if (to_scan == 0)
+ return 0;
+
+ do {
+ int nr_freed = drm_page_pool_shrink_one();
+
+ to_scan -= nr_freed;
+ nr_total += nr_freed;
+ } while (to_scan >= 0 && atomic_long_read(&total_pages));
+
+ return nr_total;
+}
+
+static struct shrinker pool_shrinker = {
+ .count_objects = drm_page_pool_shrink_count,
+ .scan_objects = drm_page_pool_shrink_scan,
+ .seeks = 1,
+ .batch = 0,
+};
+
+int drm_page_pool_init_shrinker(void)
+{
+ return register_shrinker(&pool_shrinker);
+}
+module_init(drm_page_pool_init_shrinker);
+MODULE_LICENSE("GPL v2");
diff --git a/include/drm/page_pool.h b/include/drm/page_pool.h
new file mode 100644
index 000000000000..47e240b2bc69
--- /dev/null
+++ b/include/drm/page_pool.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König
+ */
+
+#ifndef _DRM_PAGE_POOL_H_
+#define _DRM_PAGE_POOL_H_
+
+#include <linux/mmzone.h>
+#include <linux/llist.h>
+#include <linux/spinlock.h>
+
+struct drm_page_pool {
+ int count;
+ struct list_head items;
+
+ int order;
+ int (*free)(struct page *p, unsigned int order);
+
+ spinlock_t lock;
+ struct list_head list;
+};
+
+void drm_page_pool_set_max(unsigned long max);
+unsigned long drm_page_pool_get_max(void);
+unsigned long drm_page_pool_get_total(void);
+int drm_page_pool_get_size(struct drm_page_pool *pool);
+struct page *drm_page_pool_fetch(struct drm_page_pool *pool);
+void drm_page_pool_add(struct drm_page_pool *pool, struct page *page);
+struct drm_page_pool *drm_page_pool_create(unsigned int order,
+ int (*free_page)(struct page *p, unsigned int order));
+void drm_page_pool_destroy(struct drm_page_pool *pool);
+
+#endif
--
2.25.1

2021-02-05 08:50:07

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

Am 05.02.21 um 09:06 schrieb John Stultz:
> This adds a shrinker controlled page pool, closely
> following the ttm_pool logic, which is abstracted out
> a bit so it can be used by other non-ttm drivers.
>
> Cc: Daniel Vetter <[email protected]>
> Cc: Christian Koenig <[email protected]>
> Cc: Sumit Semwal <[email protected]>
> Cc: Liam Mark <[email protected]>
> Cc: Chris Goldsworthy <[email protected]>
> Cc: Laura Abbott <[email protected]>
> Cc: Brian Starkey <[email protected]>
> Cc: Hridya Valsaraju <[email protected]>
> Cc: Suren Baghdasaryan <[email protected]>
> Cc: Sandeep Patil <[email protected]>
> Cc: Daniel Mentz <[email protected]>
> Cc: Ørjan Eide <[email protected]>
> Cc: Robin Murphy <[email protected]>
> Cc: Ezequiel Garcia <[email protected]>
> Cc: Simon Ser <[email protected]>
> Cc: James Jones <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: John Stultz <[email protected]>
> ---
> drivers/gpu/drm/Kconfig | 4 +
> drivers/gpu/drm/Makefile | 1 +
> drivers/gpu/drm/page_pool.c | 220 ++++++++++++++++++++++++++++++++++++
> include/drm/page_pool.h | 54 +++++++++
> 4 files changed, 279 insertions(+)
> create mode 100644 drivers/gpu/drm/page_pool.c
> create mode 100644 include/drm/page_pool.h
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 0973f408d75f..d16bf340ed2e 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -174,6 +174,10 @@ config DRM_DP_CEC
> Note: not all adapters support this feature, and even for those
> that do support this they often do not hook up the CEC pin.
>
> +config DRM_PAGE_POOL
> + bool
> + depends on DRM
> +
> config DRM_TTM
> tristate
> depends on DRM && MMU
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index fefaff4c832d..877e0111ed34 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -32,6 +32,7 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
> drm-$(CONFIG_PCI) += drm_pci.o
> drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
> drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
> +drm-$(CONFIG_DRM_PAGE_POOL) += page_pool.o
>
> drm_vram_helper-y := drm_gem_vram_helper.o
> obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
> diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
> new file mode 100644
> index 000000000000..2139f86e6ca7
> --- /dev/null
> +++ b/drivers/gpu/drm/page_pool.c
> @@ -0,0 +1,220 @@
> +// SPDX-License-Identifier: GPL-2.0

Please use a BSD/MIT compatible license if you want to copy this from
the TTM code.

> +/*
> + * DRM page pool system
> + *
> + * Copyright (C) 2020 Linaro Ltd.
> + *
> + * Based on the ION page pool code
> + * Copyright (C) 2011 Google, Inc.
> + * As well as the ttm_pool code
> + * Copyright (C) 2020 Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/freezer.h>
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include <linux/swap.h>
> +#include <linux/sched/signal.h>
> +#include <drm/page_pool.h>
> +
> +static LIST_HEAD(pool_list);
> +static DEFINE_MUTEX(pool_list_lock);
> +static atomic_long_t total_pages;
> +static unsigned long page_pool_max;
> +MODULE_PARM_DESC(page_pool_max, "Number of pages in the WC/UC/DMA pool");
> +module_param(page_pool_max, ulong, 0644);
> +
> +void drm_page_pool_set_max(unsigned long max)
> +{
> + /* only write once */
> + if (!page_pool_max)
> + page_pool_max = max;
> +}
> +
> +unsigned long drm_page_pool_get_max(void)
> +{
> + return page_pool_max;
> +}
> +
> +unsigned long drm_page_pool_get_total(void)
> +{
> + return atomic_long_read(&total_pages);
> +}
> +
> +int drm_page_pool_get_size(struct drm_page_pool *pool)
> +{
> + int ret;
> +
> + spin_lock(&pool->lock);
> + ret = pool->count;
> + spin_unlock(&pool->lock);

Maybe use an atomic for the count instead?

> + return ret;
> +}
> +
> +static inline unsigned int drm_page_pool_free_pages(struct drm_page_pool *pool,
> + struct page *page)
> +{
> + return pool->free(page, pool->order);
> +}
> +
> +static int drm_page_pool_shrink_one(void);
> +
> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> +{
> + spin_lock(&pool->lock);
> + list_add_tail(&page->lru, &pool->items);
> + pool->count++;
> + atomic_long_add(1 << pool->order, &total_pages);
> + spin_unlock(&pool->lock);
> +
> + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
> + 1 << pool->order);

Hui what? What should that be good for?

> +
> + /* make sure we don't grow too large */
> + while (page_pool_max && atomic_long_read(&total_pages) > page_pool_max)
> + drm_page_pool_shrink_one();
> +}
> +EXPORT_SYMBOL_GPL(drm_page_pool_add);
> +
> +static struct page *drm_page_pool_remove(struct drm_page_pool *pool)
> +{
> + struct page *page;
> +
> + if (!pool->count)
> + return NULL;

Better use list_first_entry_or_null instead of checking the count.

This way you can also pull the lock into the function.

> +
> + page = list_first_entry(&pool->items, struct page, lru);
> + pool->count--;
> + atomic_long_sub(1 << pool->order, &total_pages);
> +
> + list_del(&page->lru);
> + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
> + -(1 << pool->order));
> + return page;
> +}
> +
> +struct page *drm_page_pool_fetch(struct drm_page_pool *pool)
> +{
> + struct page *page = NULL;
> +
> + if (!pool) {
> + WARN_ON(!pool);
> + return NULL;
> + }
> +
> + spin_lock(&pool->lock);
> + page = drm_page_pool_remove(pool);
> + spin_unlock(&pool->lock);
> +
> + return page;
> +}
> +EXPORT_SYMBOL_GPL(drm_page_pool_fetch);
> +
> +struct drm_page_pool *drm_page_pool_create(unsigned int order,
> + int (*free_page)(struct page *p, unsigned int order))
> +{
> + struct drm_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);

Why not making this an embedded object? We should not see much dynamic
pool creation.

> +
> + if (!pool)
> + return NULL;
> +
> + pool->count = 0;
> + INIT_LIST_HEAD(&pool->items);
> + pool->order = order;
> + pool->free = free_page;
> + spin_lock_init(&pool->lock);
> + INIT_LIST_HEAD(&pool->list);
> +
> + mutex_lock(&pool_list_lock);
> + list_add(&pool->list, &pool_list);
> + mutex_unlock(&pool_list_lock);
> +
> + return pool;
> +}
> +EXPORT_SYMBOL_GPL(drm_page_pool_create);
> +
> +void drm_page_pool_destroy(struct drm_page_pool *pool)
> +{
> + struct page *page;
> +
> + /* Remove us from the pool list */
> + mutex_lock(&pool_list_lock);
> + list_del(&pool->list);
> + mutex_unlock(&pool_list_lock);
> +
> + /* Free any remaining pages in the pool */
> + spin_lock(&pool->lock);

Locking should be unnecessary when the pool is destroyed anyway.

> + while (pool->count) {
> + page = drm_page_pool_remove(pool);
> + spin_unlock(&pool->lock);
> + drm_page_pool_free_pages(pool, page);
> + spin_lock(&pool->lock);
> + }
> + spin_unlock(&pool->lock);
> +
> + kfree(pool);
> +}
> +EXPORT_SYMBOL_GPL(drm_page_pool_destroy);
> +
> +static int drm_page_pool_shrink_one(void)
> +{
> + struct drm_page_pool *pool;
> + struct page *page;
> + int nr_freed = 0;
> +
> + mutex_lock(&pool_list_lock);
> + pool = list_first_entry(&pool_list, typeof(*pool), list);
> +
> + spin_lock(&pool->lock);
> + page = drm_page_pool_remove(pool);
> + spin_unlock(&pool->lock);
> +
> + if (page)
> + nr_freed = drm_page_pool_free_pages(pool, page);
> +
> + list_move_tail(&pool->list, &pool_list);

Better to move this up, directly after the list_first_entry().

> + mutex_unlock(&pool_list_lock);
> +
> + return nr_freed;
> +}
> +
> +static unsigned long drm_page_pool_shrink_count(struct shrinker *shrinker,
> + struct shrink_control *sc)
> +{
> + unsigned long count = atomic_long_read(&total_pages);
> +
> + return count ? count : SHRINK_EMPTY;
> +}
> +
> +static unsigned long drm_page_pool_shrink_scan(struct shrinker *shrinker,
> + struct shrink_control *sc)
> +{
> + int to_scan = sc->nr_to_scan;
> + int nr_total = 0;
> +
> + if (to_scan == 0)
> + return 0;
> +
> + do {
> + int nr_freed = drm_page_pool_shrink_one();
> +
> + to_scan -= nr_freed;
> + nr_total += nr_freed;
> + } while (to_scan >= 0 && atomic_long_read(&total_pages));
> +
> + return nr_total;
> +}
> +
> +static struct shrinker pool_shrinker = {
> + .count_objects = drm_page_pool_shrink_count,
> + .scan_objects = drm_page_pool_shrink_scan,
> + .seeks = 1,
> + .batch = 0,
> +};
> +
> +int drm_page_pool_init_shrinker(void)
> +{
> + return register_shrinker(&pool_shrinker);
> +}
> +module_init(drm_page_pool_init_shrinker);
> +MODULE_LICENSE("GPL v2");
> diff --git a/include/drm/page_pool.h b/include/drm/page_pool.h
> new file mode 100644
> index 000000000000..47e240b2bc69
> --- /dev/null
> +++ b/include/drm/page_pool.h
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/*
> + * Copyright 2020 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors: Christian König
> + */
> +
> +#ifndef _DRM_PAGE_POOL_H_
> +#define _DRM_PAGE_POOL_H_
> +
> +#include <linux/mmzone.h>
> +#include <linux/llist.h>
> +#include <linux/spinlock.h>
> +
> +struct drm_page_pool {
> + int count;
> + struct list_head items;
> +
> + int order;
> + int (*free)(struct page *p, unsigned int order);
> +
> + spinlock_t lock;
> + struct list_head list;
> +};
> +
> +void drm_page_pool_set_max(unsigned long max);
> +unsigned long drm_page_pool_get_max(void);
> +unsigned long drm_page_pool_get_total(void);
> +int drm_page_pool_get_size(struct drm_page_pool *pool);
> +struct page *drm_page_pool_fetch(struct drm_page_pool *pool);
> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page);
> +struct drm_page_pool *drm_page_pool_create(unsigned int order,
> + int (*free_page)(struct page *p, unsigned int order));
> +void drm_page_pool_destroy(struct drm_page_pool *pool);
> +
> +#endif

2021-02-05 10:45:14

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 0/7] Generic page pool & deferred freeing for system dmabuf heap

Am 05.02.21 um 09:06 schrieb John Stultz:
> This series is starting to get long, so I figured I'd add a
> short cover letter for context.
>
> The point of this series is trying to add both deferred-freeing
> logic as well as a page pool to the DMA-BUF system heap.
>
> This is desired, as the combination of deferred freeing along
> with the page pool allows us to offload page-zeroing out of
> the allocation hot path. This was done originally with ION
> and this patch series allows the DMA-BUF system heap to match
> ION's system heap allocation performance in a simple
> microbenchmark [1] (ION re-added to the kernel for comparision,
> running on an x86 vm image):
>
> ./dmabuf-heap-bench -i 0 1 system
> Testing dmabuf system vs ion heaptype 0 (flags: 0x1)
> ---------------------------------------------
> dmabuf heap: alloc 4096 bytes 5000 times in 86572223 ns 17314 ns/call
> ion heap: alloc 4096 bytes 5000 times in 97442526 ns 19488 ns/call
> dmabuf heap: alloc 1048576 bytes 5000 times in 196635057 ns 39327 ns/call
> ion heap: alloc 1048576 bytes 5000 times in 357323629 ns 71464 ns/call
> dmabuf heap: alloc 8388608 bytes 5000 times in 3165445534 ns 633089 ns/call
> ion heap: alloc 8388608 bytes 5000 times in 3699591271 ns 739918 ns/call
> dmabuf heap: alloc 33554432 bytes 5000 times in 13327402517 ns 2665480 ns/call
> ion heap: alloc 33554432 bytes 5000 times in 15292352796 ns 3058470 ns/call
>
> Daniel didn't like earlier attempts to re-use the network
> page-pool code to achieve this, and suggested the ttm_pool be
> used instead. This required pulling the fairly tightly knit
> ttm_pool logic apart, but after many failed attmempts I think
> I found a workable abstraction to split out shared logic.
>
> So this series contains a new generic drm_page_pool helper
> library, converts the ttm_pool to use it, and then adds the
> dmabuf deferred freeing and adds support to the dmabuf system
> heap to use both deferred freeing and the new drm_page_pool.
>
> Input would be greatly appreciated. Testing as well, as I don't
> have any development hardware that utilizes the ttm pool.

We can easily do the testing and the general idea sounds solid to me.

I see three major things we need to clean up here.
1. The licensing, you are moving from BSD/MIT to GPL2.
2. Don't add any new overhead to the TTM pool, especially allocating a
private object per page is a no-go.
3. What are you doing with the reclaim stuff and why?
4. Keeping the documentation would be nice to have.

Regards,
Christian.

>
> thanks
> -john
>
> [1] https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fandroid.googlesource.com%2Fplatform%2Fsystem%2Fmemory%2Flibdmabufheap%2F%2B%2Frefs%2Fheads%2Fmaster%2Ftests%2Fdmabuf_heap_bench.c&data=04%7C01%7Cchristian.koenig%40amd.com%7C2dc4d6cb3ee246558b9e08d8c9acef9a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637481091933715561%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=oMgVsrdlwS%2BqZuuatjTiWDzMU9SiUW5eRar5xwT%2BHYQ%3D&reserved=0
>
> Cc: Daniel Vetter <[email protected]>
> Cc: Christian Koenig <[email protected]>
> Cc: Sumit Semwal <[email protected]>
> Cc: Liam Mark <[email protected]>
> Cc: Chris Goldsworthy <[email protected]>
> Cc: Laura Abbott <[email protected]>
> Cc: Brian Starkey <[email protected]>
> Cc: Hridya Valsaraju <[email protected]>
> Cc: Suren Baghdasaryan <[email protected]>
> Cc: Sandeep Patil <[email protected]>
> Cc: Daniel Mentz <[email protected]>
> Cc: Ørjan Eide <[email protected]>
> Cc: Robin Murphy <[email protected]>
> Cc: Ezequiel Garcia <[email protected]>
> Cc: Simon Ser <[email protected]>
> Cc: James Jones <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
>
> John Stultz (7):
> drm: Add a sharable drm page-pool implementation
> drm: ttm_pool: Rename the ttm_pool_dma structure to ttm_pool_page_dat
> drm: ttm_pool: Rework ttm_pool_free_page to allow us to use it as a
> function pointer
> drm: ttm_pool: Rework ttm_pool to use drm_page_pool
> dma-buf: heaps: Add deferred-free-helper library code
> dma-buf: system_heap: Add drm pagepool support to system heap
> dma-buf: system_heap: Add deferred freeing to the system heap
>
> drivers/dma-buf/heaps/Kconfig | 5 +
> drivers/dma-buf/heaps/Makefile | 1 +
> drivers/dma-buf/heaps/deferred-free-helper.c | 145 ++++++++++
> drivers/dma-buf/heaps/deferred-free-helper.h | 55 ++++
> drivers/dma-buf/heaps/system_heap.c | 77 ++++-
> drivers/gpu/drm/Kconfig | 5 +
> drivers/gpu/drm/Makefile | 1 +
> drivers/gpu/drm/page_pool.c | 220 +++++++++++++++
> drivers/gpu/drm/ttm/ttm_pool.c | 278 ++++++-------------
> include/drm/page_pool.h | 54 ++++
> include/drm/ttm/ttm_pool.h | 23 +-
> 11 files changed, 639 insertions(+), 225 deletions(-)
> create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.c
> create mode 100644 drivers/dma-buf/heaps/deferred-free-helper.h
> create mode 100644 drivers/gpu/drm/page_pool.c
> create mode 100644 include/drm/page_pool.h
>

2021-02-05 21:01:12

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Fri, Feb 5, 2021 at 12:47 AM Christian König
<[email protected]> wrote:
> Am 05.02.21 um 09:06 schrieb John Stultz:
> > diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
> > new file mode 100644
> > index 000000000000..2139f86e6ca7
> > --- /dev/null
> > +++ b/drivers/gpu/drm/page_pool.c
> > @@ -0,0 +1,220 @@
> > +// SPDX-License-Identifier: GPL-2.0
>
> Please use a BSD/MIT compatible license if you want to copy this from
> the TTM code.

Hrm. This may be problematic, as it's not just TTM code, but some of
the TTM logic integrated into a page-pool implementation I wrote based
on logic from the ION code (which was GPL-2.0 before it was dropped).
So I don't think I can just make it MIT. Any extra context on the
need for MIT, or suggestions on how to best resolve this?

> > +int drm_page_pool_get_size(struct drm_page_pool *pool)
> > +{
> > + int ret;
> > +
> > + spin_lock(&pool->lock);
> > + ret = pool->count;
> > + spin_unlock(&pool->lock);
>
> Maybe use an atomic for the count instead?
>

I can do that, but am curious as to the benefit? We are mostly using
count where we already have to take the pool->lock anyway, and this
drm_page_pool_get_size() is only used for debugfs output so far, so I
don't expect it to be a hot path.

> > +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > +{
> > + spin_lock(&pool->lock);
> > + list_add_tail(&page->lru, &pool->items);
> > + pool->count++;
> > + atomic_long_add(1 << pool->order, &total_pages);
> > + spin_unlock(&pool->lock);
> > +
> > + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
> > + 1 << pool->order);
>
> Hui what? What should that be good for?

This is a carryover from the ION page pool implementation:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/android/ion/ion_page_pool.c?h=v5.10#n28

My sense is it helps with the vmstat/meminfo accounting so folks can
see the cached pages are shrinkable/freeable. This maybe falls under
other dmabuf accounting/stats discussions, so I'm happy to remove it
for now, or let the drivers using the shared page pool logic handle
the accounting themselves?

> > +static struct page *drm_page_pool_remove(struct drm_page_pool *pool)
> > +{
> > + struct page *page;
> > +
> > + if (!pool->count)
> > + return NULL;
>
> Better use list_first_entry_or_null instead of checking the count.
>
> This way you can also pull the lock into the function.

Yea, that cleans a number of things up nicely. Thank you!

> > +struct drm_page_pool *drm_page_pool_create(unsigned int order,
> > + int (*free_page)(struct page *p, unsigned int order))
> > +{
> > + struct drm_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);
>
> Why not making this an embedded object? We should not see much dynamic
> pool creation.

Yea, it felt cleaner at the time this way, but I think I will need to
switch to an embedded object in order to resolve the memory usage
issue you pointed out with growing the ttm_pool_dma, so thank you for
the suggestion!

> > +void drm_page_pool_destroy(struct drm_page_pool *pool)
> > +{
> > + struct page *page;
> > +
> > + /* Remove us from the pool list */
> > + mutex_lock(&pool_list_lock);
> > + list_del(&pool->list);
> > + mutex_unlock(&pool_list_lock);
> > +
> > + /* Free any remaining pages in the pool */
> > + spin_lock(&pool->lock);
>
> Locking should be unnecessary when the pool is destroyed anyway.

I guess if we've already pruned ourself from the pool list, then your
right, we can't race with the shrinker and it's maybe not necessary.
But it also seems easier to consistently follow the locking rules in a
very unlikely path rather than leaning on subtlety. Either way, I
think this becomes moot if I make the improvements you suggest to
drm_page_pool_remove().

> > +static int drm_page_pool_shrink_one(void)
> > +{
> > + struct drm_page_pool *pool;
> > + struct page *page;
> > + int nr_freed = 0;
> > +
> > + mutex_lock(&pool_list_lock);
> > + pool = list_first_entry(&pool_list, typeof(*pool), list);
> > +
> > + spin_lock(&pool->lock);
> > + page = drm_page_pool_remove(pool);
> > + spin_unlock(&pool->lock);
> > +
> > + if (page)
> > + nr_freed = drm_page_pool_free_pages(pool, page);
> > +
> > + list_move_tail(&pool->list, &pool_list);
>
> Better to move this up, directly after the list_first_entry().

Sounds good!

Thanks so much for your review and feedback! I'll try to get some of
the easy suggestions integrated, and will have to figure out what to
do about the re-licensing request.

thanks
-john

2021-02-05 21:02:42

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 0/7] Generic page pool & deferred freeing for system dmabuf heap

On Fri, Feb 5, 2021 at 2:36 AM Christian König <[email protected]> wrote:
> Am 05.02.21 um 09:06 schrieb John Stultz:
> > Input would be greatly appreciated. Testing as well, as I don't
> > have any development hardware that utilizes the ttm pool.
>
> We can easily do the testing and the general idea sounds solid to me.
>

Thanks so much again for the feedback!

> I see three major things we need to clean up here.
> 1. The licensing, you are moving from BSD/MIT to GPL2.

Yea, this may be sticky, as it's not just code re-used from one dual
licensed file, but combination of GPL2 work, so advice here would be
appreciated.

> 2. Don't add any new overhead to the TTM pool, especially allocating a
> private object per page is a no-go.

This will need some more series rework to solve. I've got some ideas,
but we'll see if they work.

> 3. What are you doing with the reclaim stuff and why?

As I mentioned, it's a holdover from earlier code, and I'm happy to
drop it and defer to other accounting/stats discussions that are
ongoing.

> 4. Keeping the documentation would be nice to have.

True. I didn't spend much time with documentation, as I worried folks
may have disagreed with the whole approach. I'll work to improve it
for the next go around.

Thanks so much again for the review and feedback! I really appreciate
your time here.
-john

2021-02-06 04:19:26

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Fri, Feb 5, 2021 at 12:47 PM John Stultz <[email protected]> wrote:
>
> On Fri, Feb 5, 2021 at 12:47 AM Christian König
> <[email protected]> wrote:
> > Am 05.02.21 um 09:06 schrieb John Stultz:
> > > diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
> > > new file mode 100644
> > > index 000000000000..2139f86e6ca7
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/page_pool.c
> > > @@ -0,0 +1,220 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> >
> > Please use a BSD/MIT compatible license if you want to copy this from
> > the TTM code.
>
> Hrm. This may be problematic, as it's not just TTM code, but some of
> the TTM logic integrated into a page-pool implementation I wrote based
> on logic from the ION code (which was GPL-2.0 before it was dropped).
> So I don't think I can just make it MIT. Any extra context on the
> need for MIT, or suggestions on how to best resolve this?
>
> > > +int drm_page_pool_get_size(struct drm_page_pool *pool)
> > > +{
> > > + int ret;
> > > +
> > > + spin_lock(&pool->lock);
> > > + ret = pool->count;
> > > + spin_unlock(&pool->lock);
> >
> > Maybe use an atomic for the count instead?
> >
>
> I can do that, but am curious as to the benefit? We are mostly using
> count where we already have to take the pool->lock anyway, and this
> drm_page_pool_get_size() is only used for debugfs output so far, so I
> don't expect it to be a hot path.
>
>
> > > +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > > +{
> > > + spin_lock(&pool->lock);
> > > + list_add_tail(&page->lru, &pool->items);
> > > + pool->count++;
> > > + atomic_long_add(1 << pool->order, &total_pages);
> > > + spin_unlock(&pool->lock);
> > > +
> > > + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
> > > + 1 << pool->order);
> >
> > Hui what? What should that be good for?
>
> This is a carryover from the ION page pool implementation:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/android/ion/ion_page_pool.c?h=v5.10#n28
>
> My sense is it helps with the vmstat/meminfo accounting so folks can
> see the cached pages are shrinkable/freeable. This maybe falls under
> other dmabuf accounting/stats discussions, so I'm happy to remove it
> for now, or let the drivers using the shared page pool logic handle
> the accounting themselves?

Yep, ION pools were accounted for as reclaimable kernel memory because
they could be dropped when the system is under memory pressure.

>
>
> > > +static struct page *drm_page_pool_remove(struct drm_page_pool *pool)
> > > +{
> > > + struct page *page;
> > > +
> > > + if (!pool->count)
> > > + return NULL;
> >
> > Better use list_first_entry_or_null instead of checking the count.
> >
> > This way you can also pull the lock into the function.
>
> Yea, that cleans a number of things up nicely. Thank you!
>
>
> > > +struct drm_page_pool *drm_page_pool_create(unsigned int order,
> > > + int (*free_page)(struct page *p, unsigned int order))
> > > +{
> > > + struct drm_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);
> >
> > Why not making this an embedded object? We should not see much dynamic
> > pool creation.
>
> Yea, it felt cleaner at the time this way, but I think I will need to
> switch to an embedded object in order to resolve the memory usage
> issue you pointed out with growing the ttm_pool_dma, so thank you for
> the suggestion!
>
>
> > > +void drm_page_pool_destroy(struct drm_page_pool *pool)
> > > +{
> > > + struct page *page;
> > > +
> > > + /* Remove us from the pool list */
> > > + mutex_lock(&pool_list_lock);
> > > + list_del(&pool->list);
> > > + mutex_unlock(&pool_list_lock);
> > > +
> > > + /* Free any remaining pages in the pool */
> > > + spin_lock(&pool->lock);
> >
> > Locking should be unnecessary when the pool is destroyed anyway.
>
> I guess if we've already pruned ourself from the pool list, then your
> right, we can't race with the shrinker and it's maybe not necessary.
> But it also seems easier to consistently follow the locking rules in a
> very unlikely path rather than leaning on subtlety. Either way, I
> think this becomes moot if I make the improvements you suggest to
> drm_page_pool_remove().
>
> > > +static int drm_page_pool_shrink_one(void)
> > > +{
> > > + struct drm_page_pool *pool;
> > > + struct page *page;
> > > + int nr_freed = 0;
> > > +
> > > + mutex_lock(&pool_list_lock);
> > > + pool = list_first_entry(&pool_list, typeof(*pool), list);
> > > +
> > > + spin_lock(&pool->lock);
> > > + page = drm_page_pool_remove(pool);
> > > + spin_unlock(&pool->lock);
> > > +
> > > + if (page)
> > > + nr_freed = drm_page_pool_free_pages(pool, page);
> > > +
> > > + list_move_tail(&pool->list, &pool_list);
> >
> > Better to move this up, directly after the list_first_entry().
>
> Sounds good!
>
> Thanks so much for your review and feedback! I'll try to get some of
> the easy suggestions integrated, and will have to figure out what to
> do about the re-licensing request.
>
> thanks
> -john

2021-02-09 12:18:31

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

Am 05.02.21 um 21:46 schrieb John Stultz:
> On Fri, Feb 5, 2021 at 12:47 AM Christian König
> <[email protected]> wrote:
>> Am 05.02.21 um 09:06 schrieb John Stultz:
>>> diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
>>> new file mode 100644
>>> index 000000000000..2139f86e6ca7
>>> --- /dev/null
>>> +++ b/drivers/gpu/drm/page_pool.c
>>> @@ -0,0 +1,220 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>> Please use a BSD/MIT compatible license if you want to copy this from
>> the TTM code.
> Hrm. This may be problematic, as it's not just TTM code, but some of
> the TTM logic integrated into a page-pool implementation I wrote based
> on logic from the ION code (which was GPL-2.0 before it was dropped).
> So I don't think I can just make it MIT. Any extra context on the
> need for MIT, or suggestions on how to best resolve this?

Most of the DRM/TTM code is also license able under the BSD/MIT style
license since we want to enable the BSD guys to port it over as well.

What stuff do you need from the ION code that you can't just code
yourself? As far as I have seen this is like 99% code from the TTM pool.

Christian.

>
>>> +int drm_page_pool_get_size(struct drm_page_pool *pool)
>>> +{
>>> + int ret;
>>> +
>>> + spin_lock(&pool->lock);
>>> + ret = pool->count;
>>> + spin_unlock(&pool->lock);
>> Maybe use an atomic for the count instead?
>>
> I can do that, but am curious as to the benefit? We are mostly using
> count where we already have to take the pool->lock anyway, and this
> drm_page_pool_get_size() is only used for debugfs output so far, so I
> don't expect it to be a hot path.

Yeah, it's not really important. IIRC I even walked over the linked list
to count how many pages we had.

Christian.

>
>
>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
>>> +{
>>> + spin_lock(&pool->lock);
>>> + list_add_tail(&page->lru, &pool->items);
>>> + pool->count++;
>>> + atomic_long_add(1 << pool->order, &total_pages);
>>> + spin_unlock(&pool->lock);
>>> +
>>> + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
>>> + 1 << pool->order);
>> Hui what? What should that be good for?
> This is a carryover from the ION page pool implementation:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cc4eadb0a9cf6491d99ba08d8ca173457%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637481548325174885%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=FUjZK5NSDMUYfU7vGeE4fDU2HCF%2FYyNBwc30aoLLPQ4%3D&reserved=0
>
> My sense is it helps with the vmstat/meminfo accounting so folks can
> see the cached pages are shrinkable/freeable. This maybe falls under
> other dmabuf accounting/stats discussions, so I'm happy to remove it
> for now, or let the drivers using the shared page pool logic handle
> the accounting themselves?
>
>
>>> +static struct page *drm_page_pool_remove(struct drm_page_pool *pool)
>>> +{
>>> + struct page *page;
>>> +
>>> + if (!pool->count)
>>> + return NULL;
>> Better use list_first_entry_or_null instead of checking the count.
>>
>> This way you can also pull the lock into the function.
> Yea, that cleans a number of things up nicely. Thank you!
>
>
>>> +struct drm_page_pool *drm_page_pool_create(unsigned int order,
>>> + int (*free_page)(struct page *p, unsigned int order))
>>> +{
>>> + struct drm_page_pool *pool = kmalloc(sizeof(*pool), GFP_KERNEL);
>> Why not making this an embedded object? We should not see much dynamic
>> pool creation.
> Yea, it felt cleaner at the time this way, but I think I will need to
> switch to an embedded object in order to resolve the memory usage
> issue you pointed out with growing the ttm_pool_dma, so thank you for
> the suggestion!
>
>
>>> +void drm_page_pool_destroy(struct drm_page_pool *pool)
>>> +{
>>> + struct page *page;
>>> +
>>> + /* Remove us from the pool list */
>>> + mutex_lock(&pool_list_lock);
>>> + list_del(&pool->list);
>>> + mutex_unlock(&pool_list_lock);
>>> +
>>> + /* Free any remaining pages in the pool */
>>> + spin_lock(&pool->lock);
>> Locking should be unnecessary when the pool is destroyed anyway.
> I guess if we've already pruned ourself from the pool list, then your
> right, we can't race with the shrinker and it's maybe not necessary.
> But it also seems easier to consistently follow the locking rules in a
> very unlikely path rather than leaning on subtlety. Either way, I
> think this becomes moot if I make the improvements you suggest to
> drm_page_pool_remove().
>
>>> +static int drm_page_pool_shrink_one(void)
>>> +{
>>> + struct drm_page_pool *pool;
>>> + struct page *page;
>>> + int nr_freed = 0;
>>> +
>>> + mutex_lock(&pool_list_lock);
>>> + pool = list_first_entry(&pool_list, typeof(*pool), list);
>>> +
>>> + spin_lock(&pool->lock);
>>> + page = drm_page_pool_remove(pool);
>>> + spin_unlock(&pool->lock);
>>> +
>>> + if (page)
>>> + nr_freed = drm_page_pool_free_pages(pool, page);
>>> +
>>> + list_move_tail(&pool->list, &pool_list);
>> Better to move this up, directly after the list_first_entry().
> Sounds good!
>
> Thanks so much for your review and feedback! I'll try to get some of
> the easy suggestions integrated, and will have to figure out what to
> do about the re-licensing request.
>
> thanks
> -john

2021-02-09 13:03:29

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

Am 09.02.21 um 13:11 schrieb Christian König:
> [SNIP]
>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
>>>> +{
>>>> +     spin_lock(&pool->lock);
>>>> +     list_add_tail(&page->lru, &pool->items);
>>>> +     pool->count++;
>>>> +     atomic_long_add(1 << pool->order, &total_pages);
>>>> +     spin_unlock(&pool->lock);
>>>> +
>>>> +     mod_node_page_state(page_pgdat(page),
>>>> NR_KERNEL_MISC_RECLAIMABLE,
>>>> +                         1 << pool->order);
>>> Hui what? What should that be good for?
>> This is a carryover from the ION page pool implementation:
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cc4eadb0a9cf6491d99ba08d8ca173457%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637481548325174885%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=FUjZK5NSDMUYfU7vGeE4fDU2HCF%2FYyNBwc30aoLLPQ4%3D&reserved=0
>>
>>
>> My sense is it helps with the vmstat/meminfo accounting so folks can
>> see the cached pages are shrinkable/freeable. This maybe falls under
>> other dmabuf accounting/stats discussions, so I'm happy to remove it
>> for now, or let the drivers using the shared page pool logic handle
>> the accounting themselves?

Intentionally separated the discussion for that here.

As far as I can see this is just bluntly incorrect.

Either the page is reclaimable or it is part of our pool and freeable
through the shrinker, but never ever both.

In the best case this just messes up the accounting, in the worst case
it can cause memory corruption.

Christian.

2021-02-09 17:38:43

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
>
> Am 09.02.21 um 13:11 schrieb Christian König:
> > [SNIP]
> >>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> >>>> +{
> >>>> + spin_lock(&pool->lock);
> >>>> + list_add_tail(&page->lru, &pool->items);
> >>>> + pool->count++;
> >>>> + atomic_long_add(1 << pool->order, &total_pages);
> >>>> + spin_unlock(&pool->lock);
> >>>> +
> >>>> + mod_node_page_state(page_pgdat(page),
> >>>> NR_KERNEL_MISC_RECLAIMABLE,
> >>>> + 1 << pool->order);
> >>> Hui what? What should that be good for?
> >> This is a carryover from the ION page pool implementation:
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cc4eadb0a9cf6491d99ba08d8ca173457%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637481548325174885%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=FUjZK5NSDMUYfU7vGeE4fDU2HCF%2FYyNBwc30aoLLPQ4%3D&reserved=0
> >>
> >>
> >> My sense is it helps with the vmstat/meminfo accounting so folks can
> >> see the cached pages are shrinkable/freeable. This maybe falls under
> >> other dmabuf accounting/stats discussions, so I'm happy to remove it
> >> for now, or let the drivers using the shared page pool logic handle
> >> the accounting themselves?
>
> Intentionally separated the discussion for that here.
>
> As far as I can see this is just bluntly incorrect.
>
> Either the page is reclaimable or it is part of our pool and freeable
> through the shrinker, but never ever both.

IIRC the original motivation for counting ION pooled pages as
reclaimable was to include them into /proc/meminfo's MemAvailable
calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
non-slab kernel pages" seems like a good place to account for them but
I might be wrong.

>
> In the best case this just messes up the accounting, in the worst case
> it can cause memory corruption.
>
> Christian.

2021-02-09 19:06:44

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 9, 2021 at 4:11 AM Christian König <[email protected]> wrote:
>
>
>
> Am 05.02.21 um 21:46 schrieb John Stultz:
> > On Fri, Feb 5, 2021 at 12:47 AM Christian König
> > <[email protected]> wrote:
> >> Am 05.02.21 um 09:06 schrieb John Stultz:
> >>> diff --git a/drivers/gpu/drm/page_pool.c b/drivers/gpu/drm/page_pool.c
> >>> new file mode 100644
> >>> index 000000000000..2139f86e6ca7
> >>> --- /dev/null
> >>> +++ b/drivers/gpu/drm/page_pool.c
> >>> @@ -0,0 +1,220 @@
> >>> +// SPDX-License-Identifier: GPL-2.0
> >> Please use a BSD/MIT compatible license if you want to copy this from
> >> the TTM code.
> > Hrm. This may be problematic, as it's not just TTM code, but some of
> > the TTM logic integrated into a page-pool implementation I wrote based
> > on logic from the ION code (which was GPL-2.0 before it was dropped).
> > So I don't think I can just make it MIT. Any extra context on the
> > need for MIT, or suggestions on how to best resolve this?
>
> Most of the DRM/TTM code is also license able under the BSD/MIT style
> license since we want to enable the BSD guys to port it over as well.
>
> What stuff do you need from the ION code that you can't just code
> yourself? As far as I have seen this is like 99% code from the TTM pool.

Yea, it evolved into being mostly logic from the TTM pool (or code
that was very similar to begin with), but it's not where it started.
My old days at IBM makes me wary of claiming it's no longer the Ship
of Theseus.

So instead I think I'll have to just throw out my patch and rewrite it
in full (so apologies in advance for any review issues I
introduce/reintroduce).

thanks
-john

2021-02-09 19:31:02

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 9, 2021 at 9:46 AM Christian König <[email protected]> wrote:
>
>
>
> Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> >> Am 09.02.21 um 13:11 schrieb Christian König:
> >>> [SNIP]
> >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> >>>>>> +{
> >>>>>> + spin_lock(&pool->lock);
> >>>>>> + list_add_tail(&page->lru, &pool->items);
> >>>>>> + pool->count++;
> >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> >>>>>> + spin_unlock(&pool->lock);
> >>>>>> +
> >>>>>> + mod_node_page_state(page_pgdat(page),
> >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> >>>>>> + 1 << pool->order);
> >>>>> Hui what? What should that be good for?
> >>>> This is a carryover from the ION page pool implementation:
> >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> >>>>
> >>>>
> >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> >>>> for now, or let the drivers using the shared page pool logic handle
> >>>> the accounting themselves?
> >> Intentionally separated the discussion for that here.
> >>
> >> As far as I can see this is just bluntly incorrect.
> >>
> >> Either the page is reclaimable or it is part of our pool and freeable
> >> through the shrinker, but never ever both.
> > IIRC the original motivation for counting ION pooled pages as
> > reclaimable was to include them into /proc/meminfo's MemAvailable
> > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > non-slab kernel pages" seems like a good place to account for them but
> > I might be wrong.
>
> Yeah, that's what I see here as well. But exactly that is utterly nonsense.
>
> Those pages are not "free" in the sense that get_free_page could return
> them directly.

Any ideas where these pages would fit better? We do want to know that
under memory pressure these pages can be made available (which is I
think what MemAvailable means).

>
> Regards,
> Christian.
>
> >
> >> In the best case this just messes up the accounting, in the worst case
> >> it can cause memory corruption.
> >>
> >> Christian.
>

2021-02-10 00:12:59

by Daniel Vetter

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
>
>
>
> Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> >> Am 09.02.21 um 13:11 schrieb Christian König:
> >>> [SNIP]
> >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> >>>>>> +{
> >>>>>> + spin_lock(&pool->lock);
> >>>>>> + list_add_tail(&page->lru, &pool->items);
> >>>>>> + pool->count++;
> >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> >>>>>> + spin_unlock(&pool->lock);
> >>>>>> +
> >>>>>> + mod_node_page_state(page_pgdat(page),
> >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> >>>>>> + 1 << pool->order);
> >>>>> Hui what? What should that be good for?
> >>>> This is a carryover from the ION page pool implementation:
> >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> >>>>
> >>>>
> >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> >>>> for now, or let the drivers using the shared page pool logic handle
> >>>> the accounting themselves?
> >> Intentionally separated the discussion for that here.
> >>
> >> As far as I can see this is just bluntly incorrect.
> >>
> >> Either the page is reclaimable or it is part of our pool and freeable
> >> through the shrinker, but never ever both.
> > IIRC the original motivation for counting ION pooled pages as
> > reclaimable was to include them into /proc/meminfo's MemAvailable
> > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > non-slab kernel pages" seems like a good place to account for them but
> > I might be wrong.
>
> Yeah, that's what I see here as well. But exactly that is utterly nonsense.
>
> Those pages are not "free" in the sense that get_free_page could return
> them directly.

Well on Android that is kinda true, because Android has it's
oom-killer (way back it was just a shrinker callback, not sure how it
works now), which just shot down all the background apps. So at least
some of that (everything used by background apps) is indeed
reclaimable on Android.

But that doesn't hold on Linux in general, so we can't really do this
for common code.

Also I had a long meeting with Suren, John and other googles
yesterday, and the aim is now to try and support all the Android gpu
memory accounting needs with cgroups. That should work, and it will
allow Android to handle all the Android-ism in a clean way in upstream
code. Or that's at least the plan.

I think the only thing we identified that Android still needs on top
is the dma-buf sysfs stuff, so that shared buffers (which on Android
are always dma-buf, and always stay around as dma-buf fd throughout
their lifetime) can be listed/analyzed with full detail.

But aside from this the plan for all the per-process or per-heap
account, oom-killer integration and everything else is planned to be
done with cgroups. Android (for now) only needs to account overall gpu
memory since none of it is swappable on android drivers anyway, plus
no vram, so not much needed.

Cheers, Daniel

>
> Regards,
> Christian.
>
> >
> >> In the best case this just messes up the accounting, in the worst case
> >> it can cause memory corruption.
> >>
> >> Christian.
>

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2021-02-10 07:49:44

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
>> Am 09.02.21 um 13:11 schrieb Christian König:
>>> [SNIP]
>>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
>>>>>> +{
>>>>>> + spin_lock(&pool->lock);
>>>>>> + list_add_tail(&page->lru, &pool->items);
>>>>>> + pool->count++;
>>>>>> + atomic_long_add(1 << pool->order, &total_pages);
>>>>>> + spin_unlock(&pool->lock);
>>>>>> +
>>>>>> + mod_node_page_state(page_pgdat(page),
>>>>>> NR_KERNEL_MISC_RECLAIMABLE,
>>>>>> + 1 << pool->order);
>>>>> Hui what? What should that be good for?
>>>> This is a carryover from the ION page pool implementation:
>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
>>>>
>>>>
>>>> My sense is it helps with the vmstat/meminfo accounting so folks can
>>>> see the cached pages are shrinkable/freeable. This maybe falls under
>>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
>>>> for now, or let the drivers using the shared page pool logic handle
>>>> the accounting themselves?
>> Intentionally separated the discussion for that here.
>>
>> As far as I can see this is just bluntly incorrect.
>>
>> Either the page is reclaimable or it is part of our pool and freeable
>> through the shrinker, but never ever both.
> IIRC the original motivation for counting ION pooled pages as
> reclaimable was to include them into /proc/meminfo's MemAvailable
> calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> non-slab kernel pages" seems like a good place to account for them but
> I might be wrong.

Yeah, that's what I see here as well. But exactly that is utterly nonsense.

Those pages are not "free" in the sense that get_free_page could return
them directly.

Regards,
Christian.

>
>> In the best case this just messes up the accounting, in the worst case
>> it can cause memory corruption.
>>
>> Christian.

2021-02-10 08:11:50

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
>
> On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
> >
> >
> >
> > Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> > >> Am 09.02.21 um 13:11 schrieb Christian König:
> > >>> [SNIP]
> > >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > >>>>>> +{
> > >>>>>> + spin_lock(&pool->lock);
> > >>>>>> + list_add_tail(&page->lru, &pool->items);
> > >>>>>> + pool->count++;
> > >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> > >>>>>> + spin_unlock(&pool->lock);
> > >>>>>> +
> > >>>>>> + mod_node_page_state(page_pgdat(page),
> > >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> > >>>>>> + 1 << pool->order);
> > >>>>> Hui what? What should that be good for?
> > >>>> This is a carryover from the ION page pool implementation:
> > >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> > >>>>
> > >>>>
> > >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> > >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> > >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> > >>>> for now, or let the drivers using the shared page pool logic handle
> > >>>> the accounting themselves?
> > >> Intentionally separated the discussion for that here.
> > >>
> > >> As far as I can see this is just bluntly incorrect.
> > >>
> > >> Either the page is reclaimable or it is part of our pool and freeable
> > >> through the shrinker, but never ever both.
> > > IIRC the original motivation for counting ION pooled pages as
> > > reclaimable was to include them into /proc/meminfo's MemAvailable
> > > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > > non-slab kernel pages" seems like a good place to account for them but
> > > I might be wrong.
> >
> > Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> >
> > Those pages are not "free" in the sense that get_free_page could return
> > them directly.
>
> Well on Android that is kinda true, because Android has it's
> oom-killer (way back it was just a shrinker callback, not sure how it
> works now), which just shot down all the background apps. So at least
> some of that (everything used by background apps) is indeed
> reclaimable on Android.
>
> But that doesn't hold on Linux in general, so we can't really do this
> for common code.
>
> Also I had a long meeting with Suren, John and other googles
> yesterday, and the aim is now to try and support all the Android gpu
> memory accounting needs with cgroups. That should work, and it will
> allow Android to handle all the Android-ism in a clean way in upstream
> code. Or that's at least the plan.
>
> I think the only thing we identified that Android still needs on top
> is the dma-buf sysfs stuff, so that shared buffers (which on Android
> are always dma-buf, and always stay around as dma-buf fd throughout
> their lifetime) can be listed/analyzed with full detail.
>
> But aside from this the plan for all the per-process or per-heap
> account, oom-killer integration and everything else is planned to be
> done with cgroups.

Until cgroups are ready we probably will need to add a sysfs node to
report the total dmabuf pool size and I think that would cover our
current accounting need here.
As I mentioned, not including dmabuf pools into MemAvailable would
affect that stat and I'm wondering if pools should be considered as
part of MemAvailable or not. Since MemAvailable includes SReclaimable
I think it makes sense to include them but maybe there are other
considerations that I'm missing?

> Android (for now) only needs to account overall gpu
> memory since none of it is swappable on android drivers anyway, plus
> no vram, so not much needed.
>
> Cheers, Daniel
>
> >
> > Regards,
> > Christian.
> >
> > >
> > >> In the best case this just messes up the accounting, in the worst case
> > >> it can cause memory corruption.
> > >>
> > >> Christian.
> >
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

2021-02-10 13:10:01

by Daniel Vetter

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
> On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
> >
> > On Tue, Feb 9, 2021 at 6:46 PM Christian K?nig <[email protected]> wrote:
> > >
> > >
> > >
> > > Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > > > On Tue, Feb 9, 2021 at 4:57 AM Christian K?nig <[email protected]> wrote:
> > > >> Am 09.02.21 um 13:11 schrieb Christian K?nig:
> > > >>> [SNIP]
> > > >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > > >>>>>> +{
> > > >>>>>> + spin_lock(&pool->lock);
> > > >>>>>> + list_add_tail(&page->lru, &pool->items);
> > > >>>>>> + pool->count++;
> > > >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> > > >>>>>> + spin_unlock(&pool->lock);
> > > >>>>>> +
> > > >>>>>> + mod_node_page_state(page_pgdat(page),
> > > >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> > > >>>>>> + 1 << pool->order);
> > > >>>>> Hui what? What should that be good for?
> > > >>>> This is a carryover from the ION page pool implementation:
> > > >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> > > >>>>
> > > >>>>
> > > >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> > > >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> > > >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> > > >>>> for now, or let the drivers using the shared page pool logic handle
> > > >>>> the accounting themselves?
> > > >> Intentionally separated the discussion for that here.
> > > >>
> > > >> As far as I can see this is just bluntly incorrect.
> > > >>
> > > >> Either the page is reclaimable or it is part of our pool and freeable
> > > >> through the shrinker, but never ever both.
> > > > IIRC the original motivation for counting ION pooled pages as
> > > > reclaimable was to include them into /proc/meminfo's MemAvailable
> > > > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > > > non-slab kernel pages" seems like a good place to account for them but
> > > > I might be wrong.
> > >
> > > Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> > >
> > > Those pages are not "free" in the sense that get_free_page could return
> > > them directly.
> >
> > Well on Android that is kinda true, because Android has it's
> > oom-killer (way back it was just a shrinker callback, not sure how it
> > works now), which just shot down all the background apps. So at least
> > some of that (everything used by background apps) is indeed
> > reclaimable on Android.
> >
> > But that doesn't hold on Linux in general, so we can't really do this
> > for common code.
> >
> > Also I had a long meeting with Suren, John and other googles
> > yesterday, and the aim is now to try and support all the Android gpu
> > memory accounting needs with cgroups. That should work, and it will
> > allow Android to handle all the Android-ism in a clean way in upstream
> > code. Or that's at least the plan.
> >
> > I think the only thing we identified that Android still needs on top
> > is the dma-buf sysfs stuff, so that shared buffers (which on Android
> > are always dma-buf, and always stay around as dma-buf fd throughout
> > their lifetime) can be listed/analyzed with full detail.
> >
> > But aside from this the plan for all the per-process or per-heap
> > account, oom-killer integration and everything else is planned to be
> > done with cgroups.
>
> Until cgroups are ready we probably will need to add a sysfs node to
> report the total dmabuf pool size and I think that would cover our
> current accounting need here.
> As I mentioned, not including dmabuf pools into MemAvailable would
> affect that stat and I'm wondering if pools should be considered as
> part of MemAvailable or not. Since MemAvailable includes SReclaimable
> I think it makes sense to include them but maybe there are other
> considerations that I'm missing?

On Android, yes, on upstream, not so much. Because upstream doesn't have
the android low memory killer cleanup up all the apps, so effectively we
can't reclaim that memory, and we shouldn't report it as such.
-Daniel

>
> > Android (for now) only needs to account overall gpu
> > memory since none of it is swappable on android drivers anyway, plus
> > no vram, so not much needed.
> >
> > Cheers, Daniel
> >
> > >
> > > Regards,
> > > Christian.
> > >
> > > >
> > > >> In the best case this just messes up the accounting, in the worst case
> > > >> it can cause memory corruption.
> > > >>
> > > >> Christian.
> > >
> >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2021-02-10 16:45:16

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Wed, Feb 10, 2021 at 5:06 AM Daniel Vetter <[email protected]> wrote:
>
> On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
> > On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
> > >
> > > On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
> > > >
> > > >
> > > >
> > > > Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > > > > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> > > > >> Am 09.02.21 um 13:11 schrieb Christian König:
> > > > >>> [SNIP]
> > > > >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > > > >>>>>> +{
> > > > >>>>>> + spin_lock(&pool->lock);
> > > > >>>>>> + list_add_tail(&page->lru, &pool->items);
> > > > >>>>>> + pool->count++;
> > > > >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> > > > >>>>>> + spin_unlock(&pool->lock);
> > > > >>>>>> +
> > > > >>>>>> + mod_node_page_state(page_pgdat(page),
> > > > >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> > > > >>>>>> + 1 << pool->order);
> > > > >>>>> Hui what? What should that be good for?
> > > > >>>> This is a carryover from the ION page pool implementation:
> > > > >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> > > > >>>>
> > > > >>>>
> > > > >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> > > > >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> > > > >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> > > > >>>> for now, or let the drivers using the shared page pool logic handle
> > > > >>>> the accounting themselves?
> > > > >> Intentionally separated the discussion for that here.
> > > > >>
> > > > >> As far as I can see this is just bluntly incorrect.
> > > > >>
> > > > >> Either the page is reclaimable or it is part of our pool and freeable
> > > > >> through the shrinker, but never ever both.
> > > > > IIRC the original motivation for counting ION pooled pages as
> > > > > reclaimable was to include them into /proc/meminfo's MemAvailable
> > > > > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > > > > non-slab kernel pages" seems like a good place to account for them but
> > > > > I might be wrong.
> > > >
> > > > Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> > > >
> > > > Those pages are not "free" in the sense that get_free_page could return
> > > > them directly.
> > >
> > > Well on Android that is kinda true, because Android has it's
> > > oom-killer (way back it was just a shrinker callback, not sure how it
> > > works now), which just shot down all the background apps. So at least
> > > some of that (everything used by background apps) is indeed
> > > reclaimable on Android.
> > >
> > > But that doesn't hold on Linux in general, so we can't really do this
> > > for common code.
> > >
> > > Also I had a long meeting with Suren, John and other googles
> > > yesterday, and the aim is now to try and support all the Android gpu
> > > memory accounting needs with cgroups. That should work, and it will
> > > allow Android to handle all the Android-ism in a clean way in upstream
> > > code. Or that's at least the plan.
> > >
> > > I think the only thing we identified that Android still needs on top
> > > is the dma-buf sysfs stuff, so that shared buffers (which on Android
> > > are always dma-buf, and always stay around as dma-buf fd throughout
> > > their lifetime) can be listed/analyzed with full detail.
> > >
> > > But aside from this the plan for all the per-process or per-heap
> > > account, oom-killer integration and everything else is planned to be
> > > done with cgroups.
> >
> > Until cgroups are ready we probably will need to add a sysfs node to
> > report the total dmabuf pool size and I think that would cover our
> > current accounting need here.
> > As I mentioned, not including dmabuf pools into MemAvailable would
> > affect that stat and I'm wondering if pools should be considered as
> > part of MemAvailable or not. Since MemAvailable includes SReclaimable
> > I think it makes sense to include them but maybe there are other
> > considerations that I'm missing?
>
> On Android, yes, on upstream, not so much. Because upstream doesn't have
> the android low memory killer cleanup up all the apps, so effectively we
> can't reclaim that memory, and we shouldn't report it as such.
> -Daniel

Hmm. Sorry, I fail to see why Android's low memory killer makes a
difference here. In my mind, the pages in the pools are not used but
kept there in case heaps need them (maybe that's the part I'm wrong?).
These pages can be freed by the shrinker if memory pressure rises. In
that sense I think it's very similar to reclaimable slabs which are
already accounted as part of MemAvailable. So it seems logical to me
to include unused pages in the pools here as well. What am I missing?

>
> >
> > > Android (for now) only needs to account overall gpu
> > > memory since none of it is swappable on android drivers anyway, plus
> > > no vram, so not much needed.
> > >
> > > Cheers, Daniel
> > >
> > > >
> > > > Regards,
> > > > Christian.
> > > >
> > > > >
> > > > >> In the best case this just messes up the accounting, in the worst case
> > > > >> it can cause memory corruption.
> > > > >>
> > > > >> Christian.
> > > >
> > >
> > >
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

2021-02-10 17:27:19

by Daniel Vetter

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Wed, Feb 10, 2021 at 5:39 PM Suren Baghdasaryan <[email protected]> wrote:
>
> On Wed, Feb 10, 2021 at 5:06 AM Daniel Vetter <[email protected]> wrote:
> >
> > On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
> > > On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
> > > >
> > > > On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
> > > > >
> > > > >
> > > > >
> > > > > Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > > > > > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> > > > > >> Am 09.02.21 um 13:11 schrieb Christian König:
> > > > > >>> [SNIP]
> > > > > >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > > > > >>>>>> +{
> > > > > >>>>>> + spin_lock(&pool->lock);
> > > > > >>>>>> + list_add_tail(&page->lru, &pool->items);
> > > > > >>>>>> + pool->count++;
> > > > > >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> > > > > >>>>>> + spin_unlock(&pool->lock);
> > > > > >>>>>> +
> > > > > >>>>>> + mod_node_page_state(page_pgdat(page),
> > > > > >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> > > > > >>>>>> + 1 << pool->order);
> > > > > >>>>> Hui what? What should that be good for?
> > > > > >>>> This is a carryover from the ION page pool implementation:
> > > > > >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> > > > > >>>>
> > > > > >>>>
> > > > > >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> > > > > >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> > > > > >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> > > > > >>>> for now, or let the drivers using the shared page pool logic handle
> > > > > >>>> the accounting themselves?
> > > > > >> Intentionally separated the discussion for that here.
> > > > > >>
> > > > > >> As far as I can see this is just bluntly incorrect.
> > > > > >>
> > > > > >> Either the page is reclaimable or it is part of our pool and freeable
> > > > > >> through the shrinker, but never ever both.
> > > > > > IIRC the original motivation for counting ION pooled pages as
> > > > > > reclaimable was to include them into /proc/meminfo's MemAvailable
> > > > > > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > > > > > non-slab kernel pages" seems like a good place to account for them but
> > > > > > I might be wrong.
> > > > >
> > > > > Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> > > > >
> > > > > Those pages are not "free" in the sense that get_free_page could return
> > > > > them directly.
> > > >
> > > > Well on Android that is kinda true, because Android has it's
> > > > oom-killer (way back it was just a shrinker callback, not sure how it
> > > > works now), which just shot down all the background apps. So at least
> > > > some of that (everything used by background apps) is indeed
> > > > reclaimable on Android.
> > > >
> > > > But that doesn't hold on Linux in general, so we can't really do this
> > > > for common code.
> > > >
> > > > Also I had a long meeting with Suren, John and other googles
> > > > yesterday, and the aim is now to try and support all the Android gpu
> > > > memory accounting needs with cgroups. That should work, and it will
> > > > allow Android to handle all the Android-ism in a clean way in upstream
> > > > code. Or that's at least the plan.
> > > >
> > > > I think the only thing we identified that Android still needs on top
> > > > is the dma-buf sysfs stuff, so that shared buffers (which on Android
> > > > are always dma-buf, and always stay around as dma-buf fd throughout
> > > > their lifetime) can be listed/analyzed with full detail.
> > > >
> > > > But aside from this the plan for all the per-process or per-heap
> > > > account, oom-killer integration and everything else is planned to be
> > > > done with cgroups.
> > >
> > > Until cgroups are ready we probably will need to add a sysfs node to
> > > report the total dmabuf pool size and I think that would cover our
> > > current accounting need here.
> > > As I mentioned, not including dmabuf pools into MemAvailable would
> > > affect that stat and I'm wondering if pools should be considered as
> > > part of MemAvailable or not. Since MemAvailable includes SReclaimable
> > > I think it makes sense to include them but maybe there are other
> > > considerations that I'm missing?
> >
> > On Android, yes, on upstream, not so much. Because upstream doesn't have
> > the android low memory killer cleanup up all the apps, so effectively we
> > can't reclaim that memory, and we shouldn't report it as such.
> > -Daniel
>
> Hmm. Sorry, I fail to see why Android's low memory killer makes a
> difference here. In my mind, the pages in the pools are not used but
> kept there in case heaps need them (maybe that's the part I'm wrong?).
> These pages can be freed by the shrinker if memory pressure rises. In
> that sense I think it's very similar to reclaimable slabs which are
> already accounted as part of MemAvailable. So it seems logical to me
> to include unused pages in the pools here as well. What am I missing?

Ah yes, those page pool pages we can list. But conceptually (at least
in the internals) they're shrunk through mm shrinker callbacks, like
slab cache memory. So not exactly sure where to list that.

Since we have the same pools for gpu allocations on the ttm side and
John is looking into unifying those, maybe we could add that as a
patch on top? For some nice consistency across all gpu drivers from
android to discrete. I think if you, John and Christian from ttm side
can figure out how these page pools should be reported we'll have
something that fits? Maybe John can ping you on the other thread with
the shared pool rfc between ttm and dma-buf heaps (there's so much
going right now all over I'm a bit lost).

Cheers, Daniel

>
> >
> > >
> > > > Android (for now) only needs to account overall gpu
> > > > memory since none of it is swappable on android drivers anyway, plus
> > > > no vram, so not much needed.
> > > >
> > > > Cheers, Daniel
> > > >
> > > > >
> > > > > Regards,
> > > > > Christian.
> > > > >
> > > > > >
> > > > > >> In the best case this just messes up the accounting, in the worst case
> > > > > >> it can cause memory corruption.
> > > > > >>
> > > > > >> Christian.
> > > > >
> > > >
> > > >
> > > > --
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2021-02-10 17:30:19

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Wed, Feb 10, 2021 at 9:21 AM Daniel Vetter <[email protected]> wrote:
>
> On Wed, Feb 10, 2021 at 5:39 PM Suren Baghdasaryan <[email protected]> wrote:
> >
> > On Wed, Feb 10, 2021 at 5:06 AM Daniel Vetter <[email protected]> wrote:
> > >
> > > On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
> > > > On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
> > > > >
> > > > > On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
> > > > > >
> > > > > >
> > > > > >
> > > > > > Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> > > > > > > On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> > > > > > >> Am 09.02.21 um 13:11 schrieb Christian König:
> > > > > > >>> [SNIP]
> > > > > > >>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> > > > > > >>>>>> +{
> > > > > > >>>>>> + spin_lock(&pool->lock);
> > > > > > >>>>>> + list_add_tail(&page->lru, &pool->items);
> > > > > > >>>>>> + pool->count++;
> > > > > > >>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> > > > > > >>>>>> + spin_unlock(&pool->lock);
> > > > > > >>>>>> +
> > > > > > >>>>>> + mod_node_page_state(page_pgdat(page),
> > > > > > >>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> > > > > > >>>>>> + 1 << pool->order);
> > > > > > >>>>> Hui what? What should that be good for?
> > > > > > >>>> This is a carryover from the ION page pool implementation:
> > > > > > >>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cdccccff8edcd4d147a5b08d8cd20cff2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637484888114923580%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=9%2BIBC0tezSV6Ci4S3kWfW%2BQvJm4mdunn3dF6C0kyfCw%3D&reserved=0
> > > > > > >>>>
> > > > > > >>>>
> > > > > > >>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> > > > > > >>>> see the cached pages are shrinkable/freeable. This maybe falls under
> > > > > > >>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> > > > > > >>>> for now, or let the drivers using the shared page pool logic handle
> > > > > > >>>> the accounting themselves?
> > > > > > >> Intentionally separated the discussion for that here.
> > > > > > >>
> > > > > > >> As far as I can see this is just bluntly incorrect.
> > > > > > >>
> > > > > > >> Either the page is reclaimable or it is part of our pool and freeable
> > > > > > >> through the shrinker, but never ever both.
> > > > > > > IIRC the original motivation for counting ION pooled pages as
> > > > > > > reclaimable was to include them into /proc/meminfo's MemAvailable
> > > > > > > calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> > > > > > > non-slab kernel pages" seems like a good place to account for them but
> > > > > > > I might be wrong.
> > > > > >
> > > > > > Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> > > > > >
> > > > > > Those pages are not "free" in the sense that get_free_page could return
> > > > > > them directly.
> > > > >
> > > > > Well on Android that is kinda true, because Android has it's
> > > > > oom-killer (way back it was just a shrinker callback, not sure how it
> > > > > works now), which just shot down all the background apps. So at least
> > > > > some of that (everything used by background apps) is indeed
> > > > > reclaimable on Android.
> > > > >
> > > > > But that doesn't hold on Linux in general, so we can't really do this
> > > > > for common code.
> > > > >
> > > > > Also I had a long meeting with Suren, John and other googles
> > > > > yesterday, and the aim is now to try and support all the Android gpu
> > > > > memory accounting needs with cgroups. That should work, and it will
> > > > > allow Android to handle all the Android-ism in a clean way in upstream
> > > > > code. Or that's at least the plan.
> > > > >
> > > > > I think the only thing we identified that Android still needs on top
> > > > > is the dma-buf sysfs stuff, so that shared buffers (which on Android
> > > > > are always dma-buf, and always stay around as dma-buf fd throughout
> > > > > their lifetime) can be listed/analyzed with full detail.
> > > > >
> > > > > But aside from this the plan for all the per-process or per-heap
> > > > > account, oom-killer integration and everything else is planned to be
> > > > > done with cgroups.
> > > >
> > > > Until cgroups are ready we probably will need to add a sysfs node to
> > > > report the total dmabuf pool size and I think that would cover our
> > > > current accounting need here.
> > > > As I mentioned, not including dmabuf pools into MemAvailable would
> > > > affect that stat and I'm wondering if pools should be considered as
> > > > part of MemAvailable or not. Since MemAvailable includes SReclaimable
> > > > I think it makes sense to include them but maybe there are other
> > > > considerations that I'm missing?
> > >
> > > On Android, yes, on upstream, not so much. Because upstream doesn't have
> > > the android low memory killer cleanup up all the apps, so effectively we
> > > can't reclaim that memory, and we shouldn't report it as such.
> > > -Daniel
> >
> > Hmm. Sorry, I fail to see why Android's low memory killer makes a
> > difference here. In my mind, the pages in the pools are not used but
> > kept there in case heaps need them (maybe that's the part I'm wrong?).
> > These pages can be freed by the shrinker if memory pressure rises. In
> > that sense I think it's very similar to reclaimable slabs which are
> > already accounted as part of MemAvailable. So it seems logical to me
> > to include unused pages in the pools here as well. What am I missing?
>
> Ah yes, those page pool pages we can list. But conceptually (at least
> in the internals) they're shrunk through mm shrinker callbacks, like
> slab cache memory. So not exactly sure where to list that.
>
> Since we have the same pools for gpu allocations on the ttm side and
> John is looking into unifying those, maybe we could add that as a
> patch on top? For some nice consistency across all gpu drivers from
> android to discrete. I think if you, John and Christian from ttm side
> can figure out how these page pools should be reported we'll have
> something that fits? Maybe John can ping you on the other thread with
> the shared pool rfc between ttm and dma-buf heaps (there's so much
> going right now all over I'm a bit lost).

Sounds good. I'll follow up with John to see where this discussion fits better.
Thanks!

>
> Cheers, Daniel
>
> >
> > >
> > > >
> > > > > Android (for now) only needs to account overall gpu
> > > > > memory since none of it is swappable on android drivers anyway, plus
> > > > > no vram, so not much needed.
> > > > >
> > > > > Cheers, Daniel
> > > > >
> > > > > >
> > > > > > Regards,
> > > > > > Christian.
> > > > > >
> > > > > > >
> > > > > > >> In the best case this just messes up the accounting, in the worst case
> > > > > > >> it can cause memory corruption.
> > > > > > >>
> > > > > > >> Christian.
> > > > > >
> > > > >
> > > > >
> > > > > --
> > > > > Daniel Vetter
> > > > > Software Engineer, Intel Corporation
> > > > > http://blog.ffwll.ch
> > >
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

2021-02-10 18:46:07

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

Am 10.02.21 um 17:39 schrieb Suren Baghdasaryan:
> On Wed, Feb 10, 2021 at 5:06 AM Daniel Vetter <[email protected]> wrote:
>> On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
>>> On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
>>>> On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
>>>>>
>>>>>
>>>>> Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
>>>>>> On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
>>>>>>> Am 09.02.21 um 13:11 schrieb Christian König:
>>>>>>>> [SNIP]
>>>>>>>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
>>>>>>>>>>> +{
>>>>>>>>>>> + spin_lock(&pool->lock);
>>>>>>>>>>> + list_add_tail(&page->lru, &pool->items);
>>>>>>>>>>> + pool->count++;
>>>>>>>>>>> + atomic_long_add(1 << pool->order, &total_pages);
>>>>>>>>>>> + spin_unlock(&pool->lock);
>>>>>>>>>>> +
>>>>>>>>>>> + mod_node_page_state(page_pgdat(page),
>>>>>>>>>>> NR_KERNEL_MISC_RECLAIMABLE,
>>>>>>>>>>> + 1 << pool->order);
>>>>>>>>>> Hui what? What should that be good for?
>>>>>>>>> This is a carryover from the ION page pool implementation:
>>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618339413%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=IYsJoAd7SUo12V7tS3CCRqNVm569iy%2FtoXQqm2MdC1g%3D&reserved=0
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> My sense is it helps with the vmstat/meminfo accounting so folks can
>>>>>>>>> see the cached pages are shrinkable/freeable. This maybe falls under
>>>>>>>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
>>>>>>>>> for now, or let the drivers using the shared page pool logic handle
>>>>>>>>> the accounting themselves?
>>>>>>> Intentionally separated the discussion for that here.
>>>>>>>
>>>>>>> As far as I can see this is just bluntly incorrect.
>>>>>>>
>>>>>>> Either the page is reclaimable or it is part of our pool and freeable
>>>>>>> through the shrinker, but never ever both.
>>>>>> IIRC the original motivation for counting ION pooled pages as
>>>>>> reclaimable was to include them into /proc/meminfo's MemAvailable
>>>>>> calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
>>>>>> non-slab kernel pages" seems like a good place to account for them but
>>>>>> I might be wrong.
>>>>> Yeah, that's what I see here as well. But exactly that is utterly nonsense.
>>>>>
>>>>> Those pages are not "free" in the sense that get_free_page could return
>>>>> them directly.
>>>> Well on Android that is kinda true, because Android has it's
>>>> oom-killer (way back it was just a shrinker callback, not sure how it
>>>> works now), which just shot down all the background apps. So at least
>>>> some of that (everything used by background apps) is indeed
>>>> reclaimable on Android.
>>>>
>>>> But that doesn't hold on Linux in general, so we can't really do this
>>>> for common code.
>>>>
>>>> Also I had a long meeting with Suren, John and other googles
>>>> yesterday, and the aim is now to try and support all the Android gpu
>>>> memory accounting needs with cgroups. That should work, and it will
>>>> allow Android to handle all the Android-ism in a clean way in upstream
>>>> code. Or that's at least the plan.
>>>>
>>>> I think the only thing we identified that Android still needs on top
>>>> is the dma-buf sysfs stuff, so that shared buffers (which on Android
>>>> are always dma-buf, and always stay around as dma-buf fd throughout
>>>> their lifetime) can be listed/analyzed with full detail.
>>>>
>>>> But aside from this the plan for all the per-process or per-heap
>>>> account, oom-killer integration and everything else is planned to be
>>>> done with cgroups.
>>> Until cgroups are ready we probably will need to add a sysfs node to
>>> report the total dmabuf pool size and I think that would cover our
>>> current accounting need here.
>>> As I mentioned, not including dmabuf pools into MemAvailable would
>>> affect that stat and I'm wondering if pools should be considered as
>>> part of MemAvailable or not. Since MemAvailable includes SReclaimable
>>> I think it makes sense to include them but maybe there are other
>>> considerations that I'm missing?
>> On Android, yes, on upstream, not so much. Because upstream doesn't have
>> the android low memory killer cleanup up all the apps, so effectively we
>> can't reclaim that memory, and we shouldn't report it as such.
>> -Daniel
> Hmm. Sorry, I fail to see why Android's low memory killer makes a
> difference here. In my mind, the pages in the pools are not used but
> kept there in case heaps need them (maybe that's the part I'm wrong?).
> These pages can be freed by the shrinker if memory pressure rises.

And exactly that's the difference. They *can* be freed is not the same
thing as they *are* free.

> In that sense I think it's very similar to reclaimable slabs which are
> already accounted as part of MemAvailable. So it seems logical to me
> to include unused pages in the pools here as well. What am I missing?

See the shrinkers are there because you need to do some action before
you can re-use the memory.

In the case of the TTM/DRM pool for example you need to change the
caching attributes which might cause sleeping for a TLB flush to finish.

By accounting those pages as free you mess up (for example) the handling
which makes sure that there are enough emergency reserves. I can only
strongly recommend to not do that.

What you could do is to add a sysfs interface to expose the different
shrinkers and the amount of pages in them to userspace. Similar to what
/proc/slabinfo is doing.

Regards,
Christian.

>
>>>> Android (for now) only needs to account overall gpu
>>>> memory since none of it is swappable on android drivers anyway, plus
>>>> no vram, so not much needed.
>>>>
>>>> Cheers, Daniel
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>>> In the best case this just messes up the accounting, in the worst case
>>>>>>> it can cause memory corruption.
>>>>>>>
>>>>>>> Christian.
>>>>
>>>> --
>>>> Daniel Vetter
>>>> Software Engineer, Intel Corporation
>>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618349407%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=11ENl28PIoSoUx6FhkEK9u4G6yiLc3YhsYsl1DIzsv8%3D&reserved=0
>> --
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618349407%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=11ENl28PIoSoUx6FhkEK9u4G6yiLc3YhsYsl1DIzsv8%3D&reserved=0

2021-02-10 19:14:45

[permalink] [raw]

Subject: Re: [RFC][PATCH v6 1/7] drm: Add a sharable drm page-pool implementation

On Wed, Feb 10, 2021 at 10:32 AM Christian König
<[email protected]> wrote:
>
>
>
> Am 10.02.21 um 17:39 schrieb Suren Baghdasaryan:
> > On Wed, Feb 10, 2021 at 5:06 AM Daniel Vetter <[email protected]> wrote:
> >> On Tue, Feb 09, 2021 at 12:16:51PM -0800, Suren Baghdasaryan wrote:
> >>> On Tue, Feb 9, 2021 at 12:03 PM Daniel Vetter <[email protected]> wrote:
> >>>> On Tue, Feb 9, 2021 at 6:46 PM Christian König <[email protected]> wrote:
> >>>>>
> >>>>>
> >>>>> Am 09.02.21 um 18:33 schrieb Suren Baghdasaryan:
> >>>>>> On Tue, Feb 9, 2021 at 4:57 AM Christian König <[email protected]> wrote:
> >>>>>>> Am 09.02.21 um 13:11 schrieb Christian König:
> >>>>>>>> [SNIP]
> >>>>>>>>>>> +void drm_page_pool_add(struct drm_page_pool *pool, struct page *page)
> >>>>>>>>>>> +{
> >>>>>>>>>>> + spin_lock(&pool->lock);
> >>>>>>>>>>> + list_add_tail(&page->lru, &pool->items);
> >>>>>>>>>>> + pool->count++;
> >>>>>>>>>>> + atomic_long_add(1 << pool->order, &total_pages);
> >>>>>>>>>>> + spin_unlock(&pool->lock);
> >>>>>>>>>>> +
> >>>>>>>>>>> + mod_node_page_state(page_pgdat(page),
> >>>>>>>>>>> NR_KERNEL_MISC_RECLAIMABLE,
> >>>>>>>>>>> + 1 << pool->order);
> >>>>>>>>>> Hui what? What should that be good for?
> >>>>>>>>> This is a carryover from the ION page pool implementation:
> >>>>>>>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgit.kernel.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ftorvalds%2Flinux.git%2Ftree%2Fdrivers%2Fstaging%2Fandroid%2Fion%2Fion_page_pool.c%3Fh%3Dv5.10%23n28&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618339413%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=IYsJoAd7SUo12V7tS3CCRqNVm569iy%2FtoXQqm2MdC1g%3D&reserved=0
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> My sense is it helps with the vmstat/meminfo accounting so folks can
> >>>>>>>>> see the cached pages are shrinkable/freeable. This maybe falls under
> >>>>>>>>> other dmabuf accounting/stats discussions, so I'm happy to remove it
> >>>>>>>>> for now, or let the drivers using the shared page pool logic handle
> >>>>>>>>> the accounting themselves?
> >>>>>>> Intentionally separated the discussion for that here.
> >>>>>>>
> >>>>>>> As far as I can see this is just bluntly incorrect.
> >>>>>>>
> >>>>>>> Either the page is reclaimable or it is part of our pool and freeable
> >>>>>>> through the shrinker, but never ever both.
> >>>>>> IIRC the original motivation for counting ION pooled pages as
> >>>>>> reclaimable was to include them into /proc/meminfo's MemAvailable
> >>>>>> calculations. NR_KERNEL_MISC_RECLAIMABLE defined as "reclaimable
> >>>>>> non-slab kernel pages" seems like a good place to account for them but
> >>>>>> I might be wrong.
> >>>>> Yeah, that's what I see here as well. But exactly that is utterly nonsense.
> >>>>>
> >>>>> Those pages are not "free" in the sense that get_free_page could return
> >>>>> them directly.
> >>>> Well on Android that is kinda true, because Android has it's
> >>>> oom-killer (way back it was just a shrinker callback, not sure how it
> >>>> works now), which just shot down all the background apps. So at least
> >>>> some of that (everything used by background apps) is indeed
> >>>> reclaimable on Android.
> >>>>
> >>>> But that doesn't hold on Linux in general, so we can't really do this
> >>>> for common code.
> >>>>
> >>>> Also I had a long meeting with Suren, John and other googles
> >>>> yesterday, and the aim is now to try and support all the Android gpu
> >>>> memory accounting needs with cgroups. That should work, and it will
> >>>> allow Android to handle all the Android-ism in a clean way in upstream
> >>>> code. Or that's at least the plan.
> >>>>
> >>>> I think the only thing we identified that Android still needs on top
> >>>> is the dma-buf sysfs stuff, so that shared buffers (which on Android
> >>>> are always dma-buf, and always stay around as dma-buf fd throughout
> >>>> their lifetime) can be listed/analyzed with full detail.
> >>>>
> >>>> But aside from this the plan for all the per-process or per-heap
> >>>> account, oom-killer integration and everything else is planned to be
> >>>> done with cgroups.
> >>> Until cgroups are ready we probably will need to add a sysfs node to
> >>> report the total dmabuf pool size and I think that would cover our
> >>> current accounting need here.
> >>> As I mentioned, not including dmabuf pools into MemAvailable would
> >>> affect that stat and I'm wondering if pools should be considered as
> >>> part of MemAvailable or not. Since MemAvailable includes SReclaimable
> >>> I think it makes sense to include them but maybe there are other
> >>> considerations that I'm missing?
> >> On Android, yes, on upstream, not so much. Because upstream doesn't have
> >> the android low memory killer cleanup up all the apps, so effectively we
> >> can't reclaim that memory, and we shouldn't report it as such.
> >> -Daniel
> > Hmm. Sorry, I fail to see why Android's low memory killer makes a
> > difference here. In my mind, the pages in the pools are not used but
> > kept there in case heaps need them (maybe that's the part I'm wrong?).
> > These pages can be freed by the shrinker if memory pressure rises.
>
> And exactly that's the difference. They *can* be freed is not the same
> thing as they *are* free.

No argument there. That's why I think meminfo has two separate stats
for MemFree and MemAvailable. MemFree is self-explanatory. The
description of MemAvailable in
https://www.kernel.org/doc/Documentation/filesystems/proc.txt says "An
estimate of how much memory is available for starting new
applications, without swapping.". Since dropping unused pages from
slabs, caches and pools is less expensive than swapping, I would
assume that a well-behaved system would do that before resorting to
swapping. And if so, such memory should be included in MemAvailable
because VM will make it available before swapping. But again, that's
my interpretation. WDYT?

>
> > In that sense I think it's very similar to reclaimable slabs which are
> > already accounted as part of MemAvailable. So it seems logical to me
> > to include unused pages in the pools here as well. What am I missing?
>
> See the shrinkers are there because you need to do some action before
> you can re-use the memory.
>
> In the case of the TTM/DRM pool for example you need to change the
> caching attributes which might cause sleeping for a TLB flush to finish.

I see your point here. But how about caches/pools which can be easily
dropped? Shouldn't they be part of MemAvailable?

>
> By accounting those pages as free you mess up (for example) the handling
> which makes sure that there are enough emergency reserves. I can only
> strongly recommend to not do that.
>
> What you could do is to add a sysfs interface to expose the different
> shrinkers and the amount of pages in them to userspace. Similar to what
> /proc/slabinfo is doing.

True, we can work around this. Just want to make sure whatever we do
really makes sense.
Thanks,
Suren.

>
> Regards,
> Christian.
>
> >
> >>>> Android (for now) only needs to account overall gpu
> >>>> memory since none of it is swappable on android drivers anyway, plus
> >>>> no vram, so not much needed.
> >>>>
> >>>> Cheers, Daniel
> >>>>
> >>>>> Regards,
> >>>>> Christian.
> >>>>>
> >>>>>>> In the best case this just messes up the accounting, in the worst case
> >>>>>>> it can cause memory corruption.
> >>>>>>>
> >>>>>>> Christian.
> >>>>
> >>>> --
> >>>> Daniel Vetter
> >>>> Software Engineer, Intel Corporation
> >>>> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618349407%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=11ENl28PIoSoUx6FhkEK9u4G6yiLc3YhsYsl1DIzsv8%3D&reserved=0
> >> --
> >> Daniel Vetter
> >> Software Engineer, Intel Corporation
> >> https://nam11.safelinks.protection.outlook.com/?url=http%3A%2F%2Fblog.ffwll.ch%2F&data=04%7C01%7Cchristian.koenig%40amd.com%7Cbb7155447ee149a49f3a08d8cde2685d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637485719618349407%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=11ENl28PIoSoUx6FhkEK9u4G6yiLc3YhsYsl1DIzsv8%3D&reserved=0
>

2021-02-10 19:27:32