2023-09-05 16:24:19

by Dmitry Osipenko

[permalink] [raw]
Subject: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

Introduce common drm-shmem shrinker for DRM drivers.

To start using drm-shmem shrinker drivers should do the following:

1. Implement evict() callback of GEM object where driver should check
whether object is purgeable or evictable using drm-shmem helpers and
perform the shrinking action

2. Initialize drm-shmem internals using drmm_gem_shmem_init(drm_device),
which will register drm-shmem shrinker

3. Implement madvise IOCTL that will use drm_gem_shmem_madvise()

Signed-off-by: Daniel Almeida <[email protected]>
Signed-off-by: Dmitry Osipenko <[email protected]>
---
drivers/gpu/drm/drm_gem_shmem_helper.c | 442 ++++++++++++++++--
.../gpu/drm/panfrost/panfrost_gem_shrinker.c | 9 +-
include/drm/drm_device.h | 10 +-
include/drm/drm_gem_shmem_helper.h | 71 ++-
4 files changed, 494 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 4633a418faba..a0a6c7e505c8 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -20,6 +20,7 @@
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
#include <drm/drm_prime.h>
#include <drm/drm_print.h>

@@ -88,8 +89,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private)
if (ret)
goto err_release;

- INIT_LIST_HEAD(&shmem->madv_list);
-
if (!private) {
/*
* Our buffers are kept pinned, so allocating them
@@ -128,6 +127,62 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_create);

+static bool drm_gem_shmem_is_evictable(struct drm_gem_shmem_object *shmem)
+{
+ dma_resv_assert_held(shmem->base.resv);
+
+ return (shmem->madv >= 0) && shmem->base.funcs->evict &&
+ refcount_read(&shmem->pages_use_count) &&
+ !refcount_read(&shmem->pages_pin_count) &&
+ !shmem->base.dma_buf && !shmem->base.import_attach &&
+ shmem->sgt && !shmem->evicted;
+}
+
+static void
+drm_gem_shmem_update_pages_state_locked(struct drm_gem_shmem_object *shmem)
+{
+ struct drm_gem_object *obj = &shmem->base;
+ struct drm_gem_shmem *shmem_mm = obj->dev->shmem_mm;
+ struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker;
+
+ dma_resv_assert_held(shmem->base.resv);
+
+ if (!shmem_shrinker || obj->import_attach)
+ return;
+
+ if (shmem->madv < 0)
+ drm_gem_lru_remove(&shmem->base);
+ else if (drm_gem_shmem_is_evictable(shmem) || drm_gem_shmem_is_purgeable(shmem))
+ drm_gem_lru_move_tail(&shmem_shrinker->lru_evictable, &shmem->base);
+ else if (shmem->evicted)
+ drm_gem_lru_move_tail(&shmem_shrinker->lru_evicted, &shmem->base);
+ else if (!shmem->pages)
+ drm_gem_lru_remove(&shmem->base);
+ else
+ drm_gem_lru_move_tail(&shmem_shrinker->lru_pinned, &shmem->base);
+}
+
+static void
+drm_gem_shmem_do_release_pages_locked(struct drm_gem_shmem_object *shmem)
+{
+ struct drm_gem_object *obj = &shmem->base;
+
+ if (!shmem->pages) {
+ drm_WARN_ON(obj->dev, !shmem->evicted && shmem->madv >= 0);
+ return;
+ }
+
+#ifdef CONFIG_X86
+ if (shmem->map_wc)
+ set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
+#endif
+
+ drm_gem_put_pages(obj, shmem->pages,
+ shmem->pages_mark_dirty_on_put,
+ shmem->pages_mark_accessed_on_put);
+ shmem->pages = NULL;
+}
+
/**
* drm_gem_shmem_free - Free resources associated with a shmem GEM object
* @shmem: shmem GEM object to free
@@ -142,8 +197,6 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
if (obj->import_attach) {
drm_prime_gem_destroy(obj, shmem->sgt);
} else if (!shmem->imported_sgt) {
- dma_resv_lock(shmem->base.resv, NULL);
-
drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count));

if (shmem->sgt) {
@@ -152,14 +205,27 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
sg_free_table(shmem->sgt);
kfree(shmem->sgt);
}
- if (refcount_read(&shmem->pages_use_count)) {
- drm_gem_shmem_put_pages_locked(shmem);
- drm_WARN_ON(obj->dev, !shmem->got_pages_sgt);
+
+ /*
+ * Destroying the object is a special case.. drm_gem_shmem_free()
+ * calls many things that WARN_ON if the obj lock is not held. But
+ * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
+ * cause a locking order inversion between reservation_ww_class_mutex
+ * and fs_reclaim.
+ *
+ * This deadlock is not actually possible, because no one should
+ * be already holding the lock when drm_gem_shmem_free() is called.
+ * Unfortunately lockdep is not aware of this detail. So when the
+ * refcount drops to zero, don't touch the reservation lock.
+ */
+ if (shmem->got_pages_sgt &&
+ refcount_dec_and_test(&shmem->pages_use_count)) {
+ drm_gem_shmem_do_release_pages_locked(shmem);
+ shmem->got_pages_sgt = false;
}

drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count));
-
- dma_resv_unlock(shmem->base.resv);
+ drm_WARN_ON(obj->dev, shmem->got_pages_sgt);
}

drm_gem_object_release(obj);
@@ -167,15 +233,26 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem)
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_free);

-static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
+static int
+drm_gem_shmem_acquire_pages(struct drm_gem_shmem_object *shmem, bool init)
{
struct drm_gem_object *obj = &shmem->base;
struct page **pages;

dma_resv_assert_held(shmem->base.resv);

- if (refcount_inc_not_zero(&shmem->pages_use_count))
+ if (shmem->madv < 0) {
+ drm_WARN_ON(obj->dev, shmem->pages);
+ return -ENOMEM;
+ }
+
+ if (shmem->pages) {
+ drm_WARN_ON(obj->dev, !shmem->evicted);
return 0;
+ }
+
+ if (drm_WARN_ON(obj->dev, !(init ^ refcount_read(&shmem->pages_use_count))))
+ return -EINVAL;

pages = drm_gem_get_pages(obj);
if (IS_ERR(pages)) {
@@ -196,8 +273,36 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)

shmem->pages = pages;

+ return 0;
+}
+
+static void
+drm_gem_shmem_release_pages_locked(struct drm_gem_shmem_object *shmem)
+{
+ dma_resv_assert_held(shmem->base.resv);
+ drm_gem_shmem_do_release_pages_locked(shmem);
+}
+
+static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
+{
+ int err;
+
+ dma_resv_assert_held(shmem->base.resv);
+
+ if (shmem->madv < 0)
+ return -ENOMEM;
+
+ if (refcount_inc_not_zero(&shmem->pages_use_count))
+ return 0;
+
+ err = drm_gem_shmem_acquire_pages(shmem, true);
+ if (err)
+ return err;
+
refcount_set(&shmem->pages_use_count, 1);

+ drm_gem_shmem_update_pages_state_locked(shmem);
+
return 0;
}

@@ -209,20 +314,11 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem)
*/
void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem)
{
- struct drm_gem_object *obj = &shmem->base;
-
dma_resv_assert_held(shmem->base.resv);

if (refcount_dec_and_test(&shmem->pages_use_count)) {
-#ifdef CONFIG_X86
- if (shmem->map_wc)
- set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT);
-#endif
-
- drm_gem_put_pages(obj, shmem->pages,
- shmem->pages_mark_dirty_on_put,
- shmem->pages_mark_accessed_on_put);
- shmem->pages = NULL;
+ drm_gem_shmem_release_pages_locked(shmem);
+ drm_gem_shmem_update_pages_state_locked(shmem);
}
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_put_pages_locked);
@@ -251,8 +347,15 @@ static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem)
return 0;

ret = drm_gem_shmem_get_pages_locked(shmem);
- if (!ret)
+ if (!ret) {
+ ret = drm_gem_shmem_swapin_locked(shmem);
+ if (ret) {
+ drm_gem_shmem_put_pages_locked(shmem);
+ return ret;
+ }
+
refcount_set(&shmem->pages_pin_count, 1);
+ }

return ret;
}
@@ -448,29 +551,54 @@ int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv)

madv = shmem->madv;

+ drm_gem_shmem_update_pages_state_locked(shmem);
+
return (madv >= 0);
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise_locked);

-void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem)
+int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv)
+{
+ struct drm_gem_object *obj = &shmem->base;
+ int ret;
+
+ ret = dma_resv_lock_interruptible(obj->resv, NULL);
+ if (ret)
+ return ret;
+
+ ret = drm_gem_shmem_madvise_locked(shmem, madv);
+ dma_resv_unlock(obj->resv);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise);
+
+static void drm_gem_shmem_unpin_pages_locked(struct drm_gem_shmem_object *shmem)
{
struct drm_gem_object *obj = &shmem->base;
struct drm_device *dev = obj->dev;

dma_resv_assert_held(shmem->base.resv);

- drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem));
+ if (shmem->evicted)
+ return;

dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0);
+ drm_gem_shmem_release_pages_locked(shmem);
+ drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+
sg_free_table(shmem->sgt);
kfree(shmem->sgt);
shmem->sgt = NULL;
+}

- drm_gem_shmem_put_pages_locked(shmem);
+void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem)
+{
+ struct drm_gem_object *obj = &shmem->base;

- shmem->madv = -1;
+ drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem));

- drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+ drm_gem_shmem_unpin_pages_locked(shmem);
drm_gem_free_mmap_offset(obj);

/* Our goal here is to return as much of the memory as
@@ -481,9 +609,59 @@ void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem)
shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1);

invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1);
+
+ shmem->madv = -1;
+ shmem->evicted = false;
+ drm_gem_shmem_update_pages_state_locked(shmem);
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_purge_locked);

+/**
+ * drm_gem_shmem_swapin_locked() - Moves shmem GEM back to memory and enables
+ * hardware access to the memory.
+ * @shmem: shmem GEM object
+ *
+ * This function moves shmem GEM back to memory if it was previously evicted
+ * by the memory shrinker. The GEM is ready to use on success.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem)
+{
+ struct drm_gem_object *obj = &shmem->base;
+ struct sg_table *sgt;
+ int err;
+
+ dma_resv_assert_held(shmem->base.resv);
+
+ if (shmem->evicted) {
+ err = drm_gem_shmem_acquire_pages(shmem, false);
+ if (err)
+ return err;
+
+ sgt = drm_gem_shmem_get_sg_table(shmem);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ err = dma_map_sgtable(obj->dev->dev, sgt,
+ DMA_BIDIRECTIONAL, 0);
+ if (err) {
+ sg_free_table(sgt);
+ kfree(sgt);
+ return err;
+ }
+
+ shmem->sgt = sgt;
+ shmem->evicted = false;
+
+ drm_gem_shmem_update_pages_state_locked(shmem);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_swapin_locked);
+
/**
* drm_gem_shmem_dumb_create - Create a dumb shmem buffer object
* @file: DRM file structure to create the dumb buffer for
@@ -530,22 +708,34 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
vm_fault_t ret;
struct page *page;
pgoff_t page_offset;
+ bool pages_unpinned;
+ int err;

/* We don't use vmf->pgoff since that has the fake offset */
page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;

dma_resv_lock(shmem->base.resv, NULL);

- if (page_offset >= num_pages ||
- drm_WARN_ON_ONCE(obj->dev, !shmem->pages) ||
- shmem->madv < 0) {
+ /* Sanity-check that we have the pages pointer when it should present */
+ pages_unpinned = (shmem->evicted || shmem->madv < 0 ||
+ !refcount_read(&shmem->pages_use_count));
+ drm_WARN_ON_ONCE(obj->dev, !shmem->pages ^ pages_unpinned);
+
+ if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) {
ret = VM_FAULT_SIGBUS;
} else {
+ err = drm_gem_shmem_swapin_locked(shmem);
+ if (err) {
+ ret = VM_FAULT_OOM;
+ goto unlock;
+ }
+
page = shmem->pages[page_offset];

ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
}

+unlock:
dma_resv_unlock(shmem->base.resv);

return ret;
@@ -568,6 +758,7 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
drm_WARN_ON_ONCE(obj->dev,
!refcount_inc_not_zero(&shmem->pages_use_count));

+ drm_gem_shmem_update_pages_state_locked(shmem);
dma_resv_unlock(shmem->base.resv);

drm_gem_vm_open(vma);
@@ -653,7 +844,9 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem,
drm_printf_indent(p, indent, "pages_pin_count=%u\n", refcount_read(&shmem->pages_pin_count));
drm_printf_indent(p, indent, "pages_use_count=%u\n", refcount_read(&shmem->pages_use_count));
drm_printf_indent(p, indent, "vmap_use_count=%u\n", refcount_read(&shmem->vmap_use_count));
+ drm_printf_indent(p, indent, "evicted=%d\n", shmem->evicted);
drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr);
+ drm_printf_indent(p, indent, "madv=%d\n", shmem->madv);
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_print_info);

@@ -715,6 +908,8 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_

shmem->sgt = sgt;

+ drm_gem_shmem_update_pages_state_locked(shmem);
+
return sgt;

err_free_sgt:
@@ -802,6 +997,191 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev,
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table);

+static struct drm_gem_shmem_shrinker *
+to_drm_gem_shmem_shrinker(struct shrinker *shrinker)
+{
+ return container_of(shrinker, struct drm_gem_shmem_shrinker, base);
+}
+
+static unsigned long
+drm_gem_shmem_shrinker_count_objects(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct drm_gem_shmem_shrinker *shmem_shrinker =
+ to_drm_gem_shmem_shrinker(shrinker);
+ unsigned long count = shmem_shrinker->lru_evictable.count;
+
+ if (count >= SHRINK_EMPTY)
+ return SHRINK_EMPTY - 1;
+
+ return count ?: SHRINK_EMPTY;
+}
+
+void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem)
+{
+ struct drm_gem_object *obj = &shmem->base;
+
+ drm_WARN_ON(obj->dev, !drm_gem_shmem_is_evictable(shmem));
+ drm_WARN_ON(obj->dev, shmem->evicted);
+
+ drm_gem_shmem_unpin_pages_locked(shmem);
+
+ shmem->evicted = true;
+ drm_gem_shmem_update_pages_state_locked(shmem);
+}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_evict_locked);
+
+static bool drm_gem_shmem_shrinker_evict_locked(struct drm_gem_object *obj)
+{
+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+ int err;
+
+ if (!drm_gem_shmem_is_evictable(shmem) ||
+ get_nr_swap_pages() < obj->size >> PAGE_SHIFT)
+ return false;
+
+ err = drm_gem_evict_locked(obj);
+ if (err)
+ return false;
+
+ return true;
+}
+
+static bool drm_gem_shmem_shrinker_purge_locked(struct drm_gem_object *obj)
+{
+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+ int err;
+
+ if (!drm_gem_shmem_is_purgeable(shmem))
+ return false;
+
+ err = drm_gem_evict_locked(obj);
+ if (err)
+ return false;
+
+ return true;
+}
+
+static unsigned long
+drm_gem_shmem_shrinker_scan_objects(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ struct drm_gem_shmem_shrinker *shmem_shrinker;
+ unsigned long nr_to_scan = sc->nr_to_scan;
+ unsigned long remaining = 0;
+ unsigned long freed = 0;
+
+ shmem_shrinker = to_drm_gem_shmem_shrinker(shrinker);
+
+ /* purge as many objects as we can */
+ freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable,
+ nr_to_scan, &remaining,
+ drm_gem_shmem_shrinker_purge_locked);
+
+ /* evict as many objects as we can */
+ if (freed < nr_to_scan)
+ freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable,
+ nr_to_scan - freed, &remaining,
+ drm_gem_shmem_shrinker_evict_locked);
+
+ return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP;
+}
+
+static int drm_gem_shmem_shrinker_init(struct drm_gem_shmem *shmem_mm,
+ const char *shrinker_name)
+{
+ struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker;
+ int err;
+
+ shmem_shrinker->base.count_objects = drm_gem_shmem_shrinker_count_objects;
+ shmem_shrinker->base.scan_objects = drm_gem_shmem_shrinker_scan_objects;
+ shmem_shrinker->base.seeks = DEFAULT_SEEKS;
+
+ mutex_init(&shmem_shrinker->lock);
+ drm_gem_lru_init(&shmem_shrinker->lru_evictable, &shmem_shrinker->lock);
+ drm_gem_lru_init(&shmem_shrinker->lru_evicted, &shmem_shrinker->lock);
+ drm_gem_lru_init(&shmem_shrinker->lru_pinned, &shmem_shrinker->lock);
+
+ err = register_shrinker(&shmem_shrinker->base, shrinker_name);
+ if (err) {
+ mutex_destroy(&shmem_shrinker->lock);
+ return err;
+ }
+
+ return 0;
+}
+
+static void drm_gem_shmem_shrinker_release(struct drm_device *dev,
+ struct drm_gem_shmem *shmem_mm)
+{
+ struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker;
+
+ unregister_shrinker(&shmem_shrinker->base);
+ drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evictable.list));
+ drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evicted.list));
+ drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_pinned.list));
+ mutex_destroy(&shmem_shrinker->lock);
+}
+
+static int drm_gem_shmem_init(struct drm_device *dev)
+{
+ int err;
+
+ if (drm_WARN_ON(dev, dev->shmem_mm))
+ return -EBUSY;
+
+ dev->shmem_mm = kzalloc(sizeof(*dev->shmem_mm), GFP_KERNEL);
+ if (!dev->shmem_mm)
+ return -ENOMEM;
+
+ err = drm_gem_shmem_shrinker_init(dev->shmem_mm, dev->unique);
+ if (err)
+ goto free_gem_shmem;
+
+ return 0;
+
+free_gem_shmem:
+ kfree(dev->shmem_mm);
+ dev->shmem_mm = NULL;
+
+ return err;
+}
+
+static void drm_gem_shmem_release(struct drm_device *dev, void *ptr)
+{
+ struct drm_gem_shmem *shmem_mm = dev->shmem_mm;
+
+ drm_gem_shmem_shrinker_release(dev, shmem_mm);
+ dev->shmem_mm = NULL;
+ kfree(shmem_mm);
+}
+
+/**
+ * drmm_gem_shmem_init() - Initialize drm-shmem internals
+ * @dev: DRM device
+ *
+ * Cleanup is automatically managed as part of DRM device releasing.
+ * Calling this function multiple times will result in a error.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+int drmm_gem_shmem_init(struct drm_device *dev)
+{
+ int err;
+
+ err = drm_gem_shmem_init(dev);
+ if (err)
+ return err;
+
+ err = drmm_add_action_or_reset(dev, drm_gem_shmem_release, NULL);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(drmm_gem_shmem_init);
+
MODULE_DESCRIPTION("DRM SHMEM memory-management helpers");
MODULE_IMPORT_NS(DMA_BUF);
MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
index 72193bd734e1..1aa94fff7072 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
@@ -15,6 +15,13 @@
#include "panfrost_gem.h"
#include "panfrost_mmu.h"

+static bool panfrost_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem)
+{
+ return (shmem->madv > 0) &&
+ !refcount_read(&shmem->pages_pin_count) && shmem->sgt &&
+ !shmem->base.dma_buf && !shmem->base.import_attach;
+}
+
static unsigned long
panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
{
@@ -27,7 +34,7 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc
return 0;

list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) {
- if (drm_gem_shmem_is_purgeable(shmem))
+ if (panfrost_gem_shmem_is_purgeable(shmem))
count += shmem->base.size >> PAGE_SHIFT;
}

diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 7cf4afae2e79..a978f0cb5e84 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -16,6 +16,7 @@ struct drm_vblank_crtc;
struct drm_vma_offset_manager;
struct drm_vram_mm;
struct drm_fb_helper;
+struct drm_gem_shmem_shrinker;

struct inode;

@@ -290,8 +291,13 @@ struct drm_device {
/** @vma_offset_manager: GEM information */
struct drm_vma_offset_manager *vma_offset_manager;

- /** @vram_mm: VRAM MM memory manager */
- struct drm_vram_mm *vram_mm;
+ union {
+ /** @vram_mm: VRAM MM memory manager */
+ struct drm_vram_mm *vram_mm;
+
+ /** @shmem_mm: SHMEM GEM memory manager */
+ struct drm_gem_shmem *shmem_mm;
+ };

/**
* @switch_power_state:
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 63e91e8f2d5c..65c99da25048 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -6,6 +6,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/shrinker.h>

#include <drm/drm_file.h>
#include <drm/drm_gem.h>
@@ -13,6 +14,7 @@
#include <drm/drm_prime.h>

struct dma_buf_attachment;
+struct drm_device;
struct drm_mode_create_dumb;
struct drm_printer;
struct sg_table;
@@ -53,8 +55,8 @@ struct drm_gem_shmem_object {
* @madv: State for madvise
*
* 0 is active/inuse.
+ * 1 is not-needed/can-be-purged
* A negative value is the object is purged.
- * Positive values are driver specific and not used by the helpers.
*/
int madv;

@@ -115,6 +117,12 @@ struct drm_gem_shmem_object {
* @map_wc: map object write-combined (instead of using shmem defaults).
*/
bool map_wc : 1;
+
+ /**
+ * @evicted: True if shmem pages are evicted by the memory shrinker.
+ * Used internally by memory shrinker.
+ */
+ bool evicted : 1;
};

#define to_drm_gem_shmem_obj(obj) \
@@ -133,14 +141,22 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma);

int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv);
+int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv);

static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem)
{
- return (shmem->madv > 0) &&
- !refcount_read(&shmem->pages_pin_count) && shmem->sgt &&
- !shmem->base.dma_buf && !shmem->base.import_attach;
+ dma_resv_assert_held(shmem->base.resv);
+
+ return (shmem->madv > 0) && shmem->base.funcs->evict &&
+ refcount_read(&shmem->pages_use_count) &&
+ !refcount_read(&shmem->pages_pin_count) &&
+ !shmem->base.dma_buf && !shmem->base.import_attach &&
+ (shmem->sgt || shmem->evicted);
}

+int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem);
+
+void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem);
void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem);

struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem);
@@ -284,6 +300,53 @@ static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct v
return drm_gem_shmem_mmap(shmem, vma);
}

+/**
+ * drm_gem_shmem_object_madvise - unlocked GEM object function for drm_gem_shmem_madvise_locked()
+ * @obj: GEM object
+ * @madv: Madvise value
+ *
+ * This function wraps drm_gem_shmem_madvise_locked(), providing unlocked variant.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static inline int drm_gem_shmem_object_madvise(struct drm_gem_object *obj, int madv)
+{
+ struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
+
+ return drm_gem_shmem_madvise(shmem, madv);
+}
+
+/**
+ * struct drm_gem_shmem_shrinker - Memory shrinker of GEM shmem memory manager
+ */
+struct drm_gem_shmem_shrinker {
+ /** @base: Shrinker for purging shmem GEM objects */
+ struct shrinker base;
+
+ /** @lock: Protects @lru_* */
+ struct mutex lock;
+
+ /** @lru_pinned: List of pinned shmem GEM objects */
+ struct drm_gem_lru lru_pinned;
+
+ /** @lru_evictable: List of shmem GEM objects to be evicted */
+ struct drm_gem_lru lru_evictable;
+
+ /** @lru_evicted: List of evicted shmem GEM objects */
+ struct drm_gem_lru lru_evicted;
+};
+
+/**
+ * struct drm_gem_shmem - GEM shmem memory manager
+ */
+struct drm_gem_shmem {
+ /** @shrinker: GEM shmem shrinker */
+ struct drm_gem_shmem_shrinker shrinker;
+};
+
+int drmm_gem_shmem_init(struct drm_device *dev);
+
/*
* Driver ops
*/
--
2.41.0


2023-09-12 20:35:25

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/7/23 13:03, Dan Carpenter wrote:
> 2c607edf57db6a Dmitry Osipenko 2023-09-03 @724 if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) {
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> Should this be || instead of &&? (The other thing that people do is
> add "!shmem->evicted" for readability even though it doesn't need to be
> checked. So maybe that's the issue, but the checker assumes it needs to
> be checked).
>
> d611b4a0907cec Neil Roberts 2021-02-23 725 ret = VM_FAULT_SIGBUS;
> d611b4a0907cec Neil Roberts 2021-02-23 726 } else {
> 2c607edf57db6a Dmitry Osipenko 2023-09-03 727 err = drm_gem_shmem_swapin_locked(shmem);
>
> Or maybe it's because the kbuild bot can't use the cross function db
> and shmem->pages is assigned here?

Should be a function db problem. The shmem->pages won't be NULL if
drm_gem_shmem_swapin_locked() succeeds.

--
Best regards,
Dmitry

2023-09-13 03:35:18

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/5/23 11:03, Boris Brezillon wrote:
>> * But
>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
>> + * cause a locking order inversion between reservation_ww_class_mutex
>> + * and fs_reclaim.
>> + *
>> + * This deadlock is not actually possible, because no one should
>> + * be already holding the lock when drm_gem_shmem_free() is called.
>> + * Unfortunately lockdep is not aware of this detail. So when the
>> + * refcount drops to zero, don't touch the reservation lock.
>> + */
>> + if (shmem->got_pages_sgt &&
>> + refcount_dec_and_test(&shmem->pages_use_count)) {
>> + drm_gem_shmem_do_release_pages_locked(shmem);
>> + shmem->got_pages_sgt = false;
>> }
> Leaking memory is the right thing to do if pages_use_count > 1 (it's
> better to leak than having someone access memory it no longer owns), but
> I think it's worth mentioning in the above comment.

It's unlikely that it will be only a leak without a following up
use-after-free. Neither is acceptable.

The drm_gem_shmem_free() could be changed such that kernel won't blow up
on a refcnt bug, but that's not worthwhile doing because drivers
shouldn't have silly bugs.

--
Best regards,
Dmitry

2023-09-14 01:54:29

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Wed, 13 Sep 2023 03:56:14 +0300
Dmitry Osipenko <[email protected]> wrote:

> On 9/5/23 11:03, Boris Brezillon wrote:
> >> * But
> >> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
> >> + * cause a locking order inversion between reservation_ww_class_mutex
> >> + * and fs_reclaim.
> >> + *
> >> + * This deadlock is not actually possible, because no one should
> >> + * be already holding the lock when drm_gem_shmem_free() is called.
> >> + * Unfortunately lockdep is not aware of this detail. So when the
> >> + * refcount drops to zero, don't touch the reservation lock.
> >> + */
> >> + if (shmem->got_pages_sgt &&
> >> + refcount_dec_and_test(&shmem->pages_use_count)) {
> >> + drm_gem_shmem_do_release_pages_locked(shmem);
> >> + shmem->got_pages_sgt = false;
> >> }
> > Leaking memory is the right thing to do if pages_use_count > 1 (it's
> > better to leak than having someone access memory it no longer owns), but
> > I think it's worth mentioning in the above comment.
>
> It's unlikely that it will be only a leak without a following up
> use-after-free. Neither is acceptable.

Not necessarily, if you have a page leak, it could be that the GPU has
access to those pages, but doesn't need the GEM object anymore
(pages are mapped by the iommu, which doesn't need shmem->sgt or
shmem->pages after the mapping is created). Without a WARN_ON(), this
can go unnoticed and lead to memory corruptions/information leaks.

>
> The drm_gem_shmem_free() could be changed such that kernel won't blow up
> on a refcnt bug, but that's not worthwhile doing because drivers
> shouldn't have silly bugs.

We definitely don't want to fix that, but we want to complain loudly
(WARN_ON()), and make sure the risk is limited (preventing memory from
being re-assigned to someone else by not freeing it).

2023-09-14 08:58:59

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/14/23 10:36, Boris Brezillon wrote:
> On Thu, 14 Sep 2023 07:02:52 +0300
> Dmitry Osipenko <[email protected]> wrote:
>
>> On 9/13/23 10:48, Boris Brezillon wrote:
>>> On Wed, 13 Sep 2023 03:56:14 +0300
>>> Dmitry Osipenko <[email protected]> wrote:
>>>
>>>> On 9/5/23 11:03, Boris Brezillon wrote:
>>>>>> * But
>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
>>>>>> + * and fs_reclaim.
>>>>>> + *
>>>>>> + * This deadlock is not actually possible, because no one should
>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
>>>>>> + * refcount drops to zero, don't touch the reservation lock.
>>>>>> + */
>>>>>> + if (shmem->got_pages_sgt &&
>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
>>>>>> + shmem->got_pages_sgt = false;
>>>>>> }
>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
>>>>> better to leak than having someone access memory it no longer owns), but
>>>>> I think it's worth mentioning in the above comment.
>>>>
>>>> It's unlikely that it will be only a leak without a following up
>>>> use-after-free. Neither is acceptable.
>>>
>>> Not necessarily, if you have a page leak, it could be that the GPU has
>>> access to those pages, but doesn't need the GEM object anymore
>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
>>> shmem->pages after the mapping is created). Without a WARN_ON(), this
>>> can go unnoticed and lead to memory corruptions/information leaks.
>>>
>>>>
>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
>>>> on a refcnt bug, but that's not worthwhile doing because drivers
>>>> shouldn't have silly bugs.
>>>
>>> We definitely don't want to fix that, but we want to complain loudly
>>> (WARN_ON()), and make sure the risk is limited (preventing memory from
>>> being re-assigned to someone else by not freeing it).
>>
>> That's what the code did and continues to do here. Not exactly sure what
>> you're trying to say. I'm going to relocate the comment in v17 to
>> put_pages(), we can continue discussing it there if I'm missing yours point.
>>
>
> I'm just saying it would be worth mentioning that we're intentionally
> leaking memory if shmem->pages_use_count > 1. Something like:
>
> /**
> * shmem->pages_use_count should be 1 when ->sgt != NULL and
> * zero otherwise. If some users still hold a pages reference
> * that's a bug, and we intentionally leak the pages so they
> * can't be re-allocated to someone else while the GPU/CPU
> * still have access to it.
> */
> drm_WARN_ON(drm,
> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
> drm_gem_shmem_free_pages(shmem);

That may be acceptable, but only once there will a driver using this
feature.

--
Best regards,
Dmitry

2023-09-14 09:27:56

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Thu, 14 Sep 2023 10:50:32 +0300
Dmitry Osipenko <[email protected]> wrote:

> On 9/14/23 10:36, Boris Brezillon wrote:
> > On Thu, 14 Sep 2023 07:02:52 +0300
> > Dmitry Osipenko <[email protected]> wrote:
> >
> >> On 9/13/23 10:48, Boris Brezillon wrote:
> >>> On Wed, 13 Sep 2023 03:56:14 +0300
> >>> Dmitry Osipenko <[email protected]> wrote:
> >>>
> >>>> On 9/5/23 11:03, Boris Brezillon wrote:
> >>>>>> * But
> >>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
> >>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
> >>>>>> + * and fs_reclaim.
> >>>>>> + *
> >>>>>> + * This deadlock is not actually possible, because no one should
> >>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
> >>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
> >>>>>> + * refcount drops to zero, don't touch the reservation lock.
> >>>>>> + */
> >>>>>> + if (shmem->got_pages_sgt &&
> >>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
> >>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
> >>>>>> + shmem->got_pages_sgt = false;
> >>>>>> }
> >>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
> >>>>> better to leak than having someone access memory it no longer owns), but
> >>>>> I think it's worth mentioning in the above comment.
> >>>>
> >>>> It's unlikely that it will be only a leak without a following up
> >>>> use-after-free. Neither is acceptable.
> >>>
> >>> Not necessarily, if you have a page leak, it could be that the GPU has
> >>> access to those pages, but doesn't need the GEM object anymore
> >>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
> >>> shmem->pages after the mapping is created). Without a WARN_ON(), this
> >>> can go unnoticed and lead to memory corruptions/information leaks.
> >>>
> >>>>
> >>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
> >>>> on a refcnt bug, but that's not worthwhile doing because drivers
> >>>> shouldn't have silly bugs.
> >>>
> >>> We definitely don't want to fix that, but we want to complain loudly
> >>> (WARN_ON()), and make sure the risk is limited (preventing memory from
> >>> being re-assigned to someone else by not freeing it).
> >>
> >> That's what the code did and continues to do here. Not exactly sure what
> >> you're trying to say. I'm going to relocate the comment in v17 to
> >> put_pages(), we can continue discussing it there if I'm missing yours point.
> >>
> >
> > I'm just saying it would be worth mentioning that we're intentionally
> > leaking memory if shmem->pages_use_count > 1. Something like:
> >
> > /**
> > * shmem->pages_use_count should be 1 when ->sgt != NULL and
> > * zero otherwise. If some users still hold a pages reference
> > * that's a bug, and we intentionally leak the pages so they
> > * can't be re-allocated to someone else while the GPU/CPU
> > * still have access to it.
> > */
> > drm_WARN_ON(drm,
> > refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
> > if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
> > drm_gem_shmem_free_pages(shmem);
>
> That may be acceptable, but only once there will a driver using this
> feature.

Which feature? That's not related to a specific feature, that's just
how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can
only be released in drm_gem_shmem_free(), because sgt users are not
refcounted and the sgt stays around until the GEM object is freed or
its pages are evicted. The only valid cases we have at the moment are:

- pages_use_count == 1 && sgt != NULL
- pages_use_count == 0

any other situations are buggy.

2023-09-14 10:49:53

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/13/23 10:48, Boris Brezillon wrote:
> On Wed, 13 Sep 2023 03:56:14 +0300
> Dmitry Osipenko <[email protected]> wrote:
>
>> On 9/5/23 11:03, Boris Brezillon wrote:
>>>> * But
>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
>>>> + * cause a locking order inversion between reservation_ww_class_mutex
>>>> + * and fs_reclaim.
>>>> + *
>>>> + * This deadlock is not actually possible, because no one should
>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
>>>> + * Unfortunately lockdep is not aware of this detail. So when the
>>>> + * refcount drops to zero, don't touch the reservation lock.
>>>> + */
>>>> + if (shmem->got_pages_sgt &&
>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
>>>> + shmem->got_pages_sgt = false;
>>>> }
>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
>>> better to leak than having someone access memory it no longer owns), but
>>> I think it's worth mentioning in the above comment.
>>
>> It's unlikely that it will be only a leak without a following up
>> use-after-free. Neither is acceptable.
>
> Not necessarily, if you have a page leak, it could be that the GPU has
> access to those pages, but doesn't need the GEM object anymore
> (pages are mapped by the iommu, which doesn't need shmem->sgt or
> shmem->pages after the mapping is created). Without a WARN_ON(), this
> can go unnoticed and lead to memory corruptions/information leaks.
>
>>
>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
>> on a refcnt bug, but that's not worthwhile doing because drivers
>> shouldn't have silly bugs.
>
> We definitely don't want to fix that, but we want to complain loudly
> (WARN_ON()), and make sure the risk is limited (preventing memory from
> being re-assigned to someone else by not freeing it).

That's what the code did and continues to do here. Not exactly sure what
you're trying to say. I'm going to relocate the comment in v17 to
put_pages(), we can continue discussing it there if I'm missing yours point.

--
Best regards,
Dmitry

2023-09-14 13:16:58

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Thu, 14 Sep 2023 07:02:52 +0300
Dmitry Osipenko <[email protected]> wrote:

> On 9/13/23 10:48, Boris Brezillon wrote:
> > On Wed, 13 Sep 2023 03:56:14 +0300
> > Dmitry Osipenko <[email protected]> wrote:
> >
> >> On 9/5/23 11:03, Boris Brezillon wrote:
> >>>> * But
> >>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
> >>>> + * cause a locking order inversion between reservation_ww_class_mutex
> >>>> + * and fs_reclaim.
> >>>> + *
> >>>> + * This deadlock is not actually possible, because no one should
> >>>> + * be already holding the lock when drm_gem_shmem_free() is called.
> >>>> + * Unfortunately lockdep is not aware of this detail. So when the
> >>>> + * refcount drops to zero, don't touch the reservation lock.
> >>>> + */
> >>>> + if (shmem->got_pages_sgt &&
> >>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
> >>>> + drm_gem_shmem_do_release_pages_locked(shmem);
> >>>> + shmem->got_pages_sgt = false;
> >>>> }
> >>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
> >>> better to leak than having someone access memory it no longer owns), but
> >>> I think it's worth mentioning in the above comment.
> >>
> >> It's unlikely that it will be only a leak without a following up
> >> use-after-free. Neither is acceptable.
> >
> > Not necessarily, if you have a page leak, it could be that the GPU has
> > access to those pages, but doesn't need the GEM object anymore
> > (pages are mapped by the iommu, which doesn't need shmem->sgt or
> > shmem->pages after the mapping is created). Without a WARN_ON(), this
> > can go unnoticed and lead to memory corruptions/information leaks.
> >
> >>
> >> The drm_gem_shmem_free() could be changed such that kernel won't blow up
> >> on a refcnt bug, but that's not worthwhile doing because drivers
> >> shouldn't have silly bugs.
> >
> > We definitely don't want to fix that, but we want to complain loudly
> > (WARN_ON()), and make sure the risk is limited (preventing memory from
> > being re-assigned to someone else by not freeing it).
>
> That's what the code did and continues to do here. Not exactly sure what
> you're trying to say. I'm going to relocate the comment in v17 to
> put_pages(), we can continue discussing it there if I'm missing yours point.
>

I'm just saying it would be worth mentioning that we're intentionally
leaking memory if shmem->pages_use_count > 1. Something like:

/**
* shmem->pages_use_count should be 1 when ->sgt != NULL and
* zero otherwise. If some users still hold a pages reference
* that's a bug, and we intentionally leak the pages so they
* can't be re-allocated to someone else while the GPU/CPU
* still have access to it.
*/
drm_WARN_ON(drm,
refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
drm_gem_shmem_free_pages(shmem);

2023-09-14 13:58:17

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/14/23 11:27, Boris Brezillon wrote:
> On Thu, 14 Sep 2023 10:50:32 +0300
> Dmitry Osipenko <[email protected]> wrote:
>
>> On 9/14/23 10:36, Boris Brezillon wrote:
>>> On Thu, 14 Sep 2023 07:02:52 +0300
>>> Dmitry Osipenko <[email protected]> wrote:
>>>
>>>> On 9/13/23 10:48, Boris Brezillon wrote:
>>>>> On Wed, 13 Sep 2023 03:56:14 +0300
>>>>> Dmitry Osipenko <[email protected]> wrote:
>>>>>
>>>>>> On 9/5/23 11:03, Boris Brezillon wrote:
>>>>>>>> * But
>>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
>>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
>>>>>>>> + * and fs_reclaim.
>>>>>>>> + *
>>>>>>>> + * This deadlock is not actually possible, because no one should
>>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
>>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
>>>>>>>> + * refcount drops to zero, don't touch the reservation lock.
>>>>>>>> + */
>>>>>>>> + if (shmem->got_pages_sgt &&
>>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
>>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
>>>>>>>> + shmem->got_pages_sgt = false;
>>>>>>>> }
>>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
>>>>>>> better to leak than having someone access memory it no longer owns), but
>>>>>>> I think it's worth mentioning in the above comment.
>>>>>>
>>>>>> It's unlikely that it will be only a leak without a following up
>>>>>> use-after-free. Neither is acceptable.
>>>>>
>>>>> Not necessarily, if you have a page leak, it could be that the GPU has
>>>>> access to those pages, but doesn't need the GEM object anymore
>>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
>>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this
>>>>> can go unnoticed and lead to memory corruptions/information leaks.
>>>>>
>>>>>>
>>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
>>>>>> on a refcnt bug, but that's not worthwhile doing because drivers
>>>>>> shouldn't have silly bugs.
>>>>>
>>>>> We definitely don't want to fix that, but we want to complain loudly
>>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from
>>>>> being re-assigned to someone else by not freeing it).
>>>>
>>>> That's what the code did and continues to do here. Not exactly sure what
>>>> you're trying to say. I'm going to relocate the comment in v17 to
>>>> put_pages(), we can continue discussing it there if I'm missing yours point.
>>>>
>>>
>>> I'm just saying it would be worth mentioning that we're intentionally
>>> leaking memory if shmem->pages_use_count > 1. Something like:
>>>
>>> /**
>>> * shmem->pages_use_count should be 1 when ->sgt != NULL and
>>> * zero otherwise. If some users still hold a pages reference
>>> * that's a bug, and we intentionally leak the pages so they
>>> * can't be re-allocated to someone else while the GPU/CPU
>>> * still have access to it.
>>> */
>>> drm_WARN_ON(drm,
>>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
>>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
>>> drm_gem_shmem_free_pages(shmem);
>>
>> That may be acceptable, but only once there will a driver using this
>> feature.
>
> Which feature? That's not related to a specific feature, that's just
> how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can
> only be released in drm_gem_shmem_free(), because sgt users are not
> refcounted and the sgt stays around until the GEM object is freed or
> its pages are evicted. The only valid cases we have at the moment are:
>
> - pages_use_count == 1 && sgt != NULL
> - pages_use_count == 0
>
> any other situations are buggy.

sgt may belong to dma-buf for which pages_use_count=0, this can't be
done until sgt mess is sorted out

--
Best regards,
Dmitry

2023-09-14 14:03:02

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/14/23 14:58, Boris Brezillon wrote:
> On Thu, 14 Sep 2023 14:36:23 +0300
> Dmitry Osipenko <[email protected]> wrote:
>
>> On 9/14/23 11:27, Boris Brezillon wrote:
>>> On Thu, 14 Sep 2023 10:50:32 +0300
>>> Dmitry Osipenko <[email protected]> wrote:
>>>
>>>> On 9/14/23 10:36, Boris Brezillon wrote:
>>>>> On Thu, 14 Sep 2023 07:02:52 +0300
>>>>> Dmitry Osipenko <[email protected]> wrote:
>>>>>
>>>>>> On 9/13/23 10:48, Boris Brezillon wrote:
>>>>>>> On Wed, 13 Sep 2023 03:56:14 +0300
>>>>>>> Dmitry Osipenko <[email protected]> wrote:
>>>>>>>
>>>>>>>> On 9/5/23 11:03, Boris Brezillon wrote:
>>>>>>>>>> * But
>>>>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
>>>>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
>>>>>>>>>> + * and fs_reclaim.
>>>>>>>>>> + *
>>>>>>>>>> + * This deadlock is not actually possible, because no one should
>>>>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
>>>>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
>>>>>>>>>> + * refcount drops to zero, don't touch the reservation lock.
>>>>>>>>>> + */
>>>>>>>>>> + if (shmem->got_pages_sgt &&
>>>>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
>>>>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
>>>>>>>>>> + shmem->got_pages_sgt = false;
>>>>>>>>>> }
>>>>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
>>>>>>>>> better to leak than having someone access memory it no longer owns), but
>>>>>>>>> I think it's worth mentioning in the above comment.
>>>>>>>>
>>>>>>>> It's unlikely that it will be only a leak without a following up
>>>>>>>> use-after-free. Neither is acceptable.
>>>>>>>
>>>>>>> Not necessarily, if you have a page leak, it could be that the GPU has
>>>>>>> access to those pages, but doesn't need the GEM object anymore
>>>>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
>>>>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this
>>>>>>> can go unnoticed and lead to memory corruptions/information leaks.
>>>>>>>
>>>>>>>>
>>>>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
>>>>>>>> on a refcnt bug, but that's not worthwhile doing because drivers
>>>>>>>> shouldn't have silly bugs.
>>>>>>>
>>>>>>> We definitely don't want to fix that, but we want to complain loudly
>>>>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from
>>>>>>> being re-assigned to someone else by not freeing it).
>>>>>>
>>>>>> That's what the code did and continues to do here. Not exactly sure what
>>>>>> you're trying to say. I'm going to relocate the comment in v17 to
>>>>>> put_pages(), we can continue discussing it there if I'm missing yours point.
>>>>>>
>>>>>
>>>>> I'm just saying it would be worth mentioning that we're intentionally
>>>>> leaking memory if shmem->pages_use_count > 1. Something like:
>>>>>
>>>>> /**
>>>>> * shmem->pages_use_count should be 1 when ->sgt != NULL and
>>>>> * zero otherwise. If some users still hold a pages reference
>>>>> * that's a bug, and we intentionally leak the pages so they
>>>>> * can't be re-allocated to someone else while the GPU/CPU
>>>>> * still have access to it.
>>>>> */
>>>>> drm_WARN_ON(drm,
>>>>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
>>>>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
>>>>> drm_gem_shmem_free_pages(shmem);
>>>>
>>>> That may be acceptable, but only once there will a driver using this
>>>> feature.
>>>
>>> Which feature? That's not related to a specific feature, that's just
>>> how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can
>>> only be released in drm_gem_shmem_free(), because sgt users are not
>>> refcounted and the sgt stays around until the GEM object is freed or
>>> its pages are evicted. The only valid cases we have at the moment are:
>>>
>>> - pages_use_count == 1 && sgt != NULL
>>> - pages_use_count == 0
>>>
>>> any other situations are buggy.
>>
>> sgt may belong to dma-buf for which pages_use_count=0, this can't be
>> done until sgt mess is sorted out
>
> No it can't, not in that path, because the code you're adding is in the
> if (!obj->import_branch) branch:
>
>
> if (obj->import_attach) {
> drm_prime_gem_destroy(obj, shmem->sgt);
> } else {
> ...
> // Your changes are here.
> ...

This branch is taken for the dma-buf in the prime import error code path. But yes, the pages_use_count=0 for the dma-buf and then it can be written as:

if (obj->import_attach) {
drm_prime_gem_destroy(obj, shmem->sgt);
} else {
drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count));

if (shmem->sgt && refcount_read(&shmem->pages_use_count)) {
dma_unmap_sgtable(obj->dev->dev, shmem->sgt,
DMA_BIDIRECTIONAL, 0);
sg_free_table(shmem->sgt);
kfree(shmem->sgt);

__drm_gem_shmem_put_pages(shmem);
}

drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count));

Alright, I'll check if it works as expected for fixing the error code path bug for v17

--
Best regards,
Dmitry

2023-09-14 14:40:57

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Thu, 14 Sep 2023 14:36:23 +0300
Dmitry Osipenko <[email protected]> wrote:

> On 9/14/23 11:27, Boris Brezillon wrote:
> > On Thu, 14 Sep 2023 10:50:32 +0300
> > Dmitry Osipenko <[email protected]> wrote:
> >
> >> On 9/14/23 10:36, Boris Brezillon wrote:
> >>> On Thu, 14 Sep 2023 07:02:52 +0300
> >>> Dmitry Osipenko <[email protected]> wrote:
> >>>
> >>>> On 9/13/23 10:48, Boris Brezillon wrote:
> >>>>> On Wed, 13 Sep 2023 03:56:14 +0300
> >>>>> Dmitry Osipenko <[email protected]> wrote:
> >>>>>
> >>>>>> On 9/5/23 11:03, Boris Brezillon wrote:
> >>>>>>>> * But
> >>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
> >>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
> >>>>>>>> + * and fs_reclaim.
> >>>>>>>> + *
> >>>>>>>> + * This deadlock is not actually possible, because no one should
> >>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
> >>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
> >>>>>>>> + * refcount drops to zero, don't touch the reservation lock.
> >>>>>>>> + */
> >>>>>>>> + if (shmem->got_pages_sgt &&
> >>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
> >>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
> >>>>>>>> + shmem->got_pages_sgt = false;
> >>>>>>>> }
> >>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
> >>>>>>> better to leak than having someone access memory it no longer owns), but
> >>>>>>> I think it's worth mentioning in the above comment.
> >>>>>>
> >>>>>> It's unlikely that it will be only a leak without a following up
> >>>>>> use-after-free. Neither is acceptable.
> >>>>>
> >>>>> Not necessarily, if you have a page leak, it could be that the GPU has
> >>>>> access to those pages, but doesn't need the GEM object anymore
> >>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
> >>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this
> >>>>> can go unnoticed and lead to memory corruptions/information leaks.
> >>>>>
> >>>>>>
> >>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
> >>>>>> on a refcnt bug, but that's not worthwhile doing because drivers
> >>>>>> shouldn't have silly bugs.
> >>>>>
> >>>>> We definitely don't want to fix that, but we want to complain loudly
> >>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from
> >>>>> being re-assigned to someone else by not freeing it).
> >>>>
> >>>> That's what the code did and continues to do here. Not exactly sure what
> >>>> you're trying to say. I'm going to relocate the comment in v17 to
> >>>> put_pages(), we can continue discussing it there if I'm missing yours point.
> >>>>
> >>>
> >>> I'm just saying it would be worth mentioning that we're intentionally
> >>> leaking memory if shmem->pages_use_count > 1. Something like:
> >>>
> >>> /**
> >>> * shmem->pages_use_count should be 1 when ->sgt != NULL and
> >>> * zero otherwise. If some users still hold a pages reference
> >>> * that's a bug, and we intentionally leak the pages so they
> >>> * can't be re-allocated to someone else while the GPU/CPU
> >>> * still have access to it.
> >>> */
> >>> drm_WARN_ON(drm,
> >>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
> >>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
> >>> drm_gem_shmem_free_pages(shmem);
> >>
> >> That may be acceptable, but only once there will a driver using this
> >> feature.
> >
> > Which feature? That's not related to a specific feature, that's just
> > how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can
> > only be released in drm_gem_shmem_free(), because sgt users are not
> > refcounted and the sgt stays around until the GEM object is freed or
> > its pages are evicted. The only valid cases we have at the moment are:
> >
> > - pages_use_count == 1 && sgt != NULL
> > - pages_use_count == 0
> >
> > any other situations are buggy.
>
> sgt may belong to dma-buf for which pages_use_count=0, this can't be
> done until sgt mess is sorted out

No it can't, not in that path, because the code you're adding is in the
if (!obj->import_branch) branch:


if (obj->import_attach) {
drm_prime_gem_destroy(obj, shmem->sgt);
} else {
...
// Your changes are here.
...
}
>

2023-09-14 15:14:29

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On 9/14/23 16:27, Boris Brezillon wrote:
...
> If you added this pages_use_count > 0 check to deal with the
> 'free-partially-imported-GEM' case, I keep thinking this is not
> the right fix. You should just assume that obj->import_attach == NULL
> means not-a-prime-buffer, and then make sure
> partially-initialized-prime-GEMs have import_attach assigned (see the
> oneliner I suggested in my review of
> `[PATCH v15 01/23] drm/shmem-helper: Fix UAF in error path when
> freeing SGT of imported GEM`).

Yes, I added it to deal with the partially imported GEM. The
obj->import_attach can't be set until dma-buf is fully imported as it
also will cause trouble for the error code path, now dma-buf will be
freed two times.

>> dma_unmap_sgtable(obj->dev->dev, shmem->sgt,
>> DMA_BIDIRECTIONAL, 0);
>> sg_free_table(shmem->sgt);
>> kfree(shmem->sgt);
>>
>> __drm_gem_shmem_put_pages(shmem);
> You need to decrement pages_use_count:
>
> /* shmem->pages_use_count should be 1 when ->sgt != NULL and
> * zero otherwise. If some users still hold a pages reference
> * that's a bug, and we intentionally leak the pages so they
> * can't be re-allocated to someone else while the GPU/CPU
> * still have access to it.
> */
> if (refcount_dec_and_test(&shmem->pages_use_count))
> __drm_gem_shmem_put_pages(shmem);
>

The put_pages() itself decrements the refcnt.

I'm going back to deferring all this questionable changes for the later
times. It is not essential problem for this patchset.

--
Best regards,
Dmitry

2023-09-14 17:36:26

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Thu, 14 Sep 2023 16:01:37 +0300
Dmitry Osipenko <[email protected]> wrote:

> On 9/14/23 14:58, Boris Brezillon wrote:
> > On Thu, 14 Sep 2023 14:36:23 +0300
> > Dmitry Osipenko <[email protected]> wrote:
> >
> >> On 9/14/23 11:27, Boris Brezillon wrote:
> >>> On Thu, 14 Sep 2023 10:50:32 +0300
> >>> Dmitry Osipenko <[email protected]> wrote:
> >>>
> >>>> On 9/14/23 10:36, Boris Brezillon wrote:
> >>>>> On Thu, 14 Sep 2023 07:02:52 +0300
> >>>>> Dmitry Osipenko <[email protected]> wrote:
> >>>>>
> >>>>>> On 9/13/23 10:48, Boris Brezillon wrote:
> >>>>>>> On Wed, 13 Sep 2023 03:56:14 +0300
> >>>>>>> Dmitry Osipenko <[email protected]> wrote:
> >>>>>>>
> >>>>>>>> On 9/5/23 11:03, Boris Brezillon wrote:
> >>>>>>>>>> * But
> >>>>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can
> >>>>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex
> >>>>>>>>>> + * and fs_reclaim.
> >>>>>>>>>> + *
> >>>>>>>>>> + * This deadlock is not actually possible, because no one should
> >>>>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called.
> >>>>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the
> >>>>>>>>>> + * refcount drops to zero, don't touch the reservation lock.
> >>>>>>>>>> + */
> >>>>>>>>>> + if (shmem->got_pages_sgt &&
> >>>>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) {
> >>>>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem);
> >>>>>>>>>> + shmem->got_pages_sgt = false;
> >>>>>>>>>> }
> >>>>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's
> >>>>>>>>> better to leak than having someone access memory it no longer owns), but
> >>>>>>>>> I think it's worth mentioning in the above comment.
> >>>>>>>>
> >>>>>>>> It's unlikely that it will be only a leak without a following up
> >>>>>>>> use-after-free. Neither is acceptable.
> >>>>>>>
> >>>>>>> Not necessarily, if you have a page leak, it could be that the GPU has
> >>>>>>> access to those pages, but doesn't need the GEM object anymore
> >>>>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or
> >>>>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this
> >>>>>>> can go unnoticed and lead to memory corruptions/information leaks.
> >>>>>>>
> >>>>>>>>
> >>>>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up
> >>>>>>>> on a refcnt bug, but that's not worthwhile doing because drivers
> >>>>>>>> shouldn't have silly bugs.
> >>>>>>>
> >>>>>>> We definitely don't want to fix that, but we want to complain loudly
> >>>>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from
> >>>>>>> being re-assigned to someone else by not freeing it).
> >>>>>>
> >>>>>> That's what the code did and continues to do here. Not exactly sure what
> >>>>>> you're trying to say. I'm going to relocate the comment in v17 to
> >>>>>> put_pages(), we can continue discussing it there if I'm missing yours point.
> >>>>>>
> >>>>>
> >>>>> I'm just saying it would be worth mentioning that we're intentionally
> >>>>> leaking memory if shmem->pages_use_count > 1. Something like:
> >>>>>
> >>>>> /**
> >>>>> * shmem->pages_use_count should be 1 when ->sgt != NULL and
> >>>>> * zero otherwise. If some users still hold a pages reference
> >>>>> * that's a bug, and we intentionally leak the pages so they
> >>>>> * can't be re-allocated to someone else while the GPU/CPU
> >>>>> * still have access to it.
> >>>>> */
> >>>>> drm_WARN_ON(drm,
> >>>>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0));
> >>>>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count))
> >>>>> drm_gem_shmem_free_pages(shmem);
> >>>>
> >>>> That may be acceptable, but only once there will a driver using this
> >>>> feature.
> >>>
> >>> Which feature? That's not related to a specific feature, that's just
> >>> how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can
> >>> only be released in drm_gem_shmem_free(), because sgt users are not
> >>> refcounted and the sgt stays around until the GEM object is freed or
> >>> its pages are evicted. The only valid cases we have at the moment are:
> >>>
> >>> - pages_use_count == 1 && sgt != NULL
> >>> - pages_use_count == 0
> >>>
> >>> any other situations are buggy.
> >>
> >> sgt may belong to dma-buf for which pages_use_count=0, this can't be
> >> done until sgt mess is sorted out
> >
> > No it can't, not in that path, because the code you're adding is in the
> > if (!obj->import_branch) branch:
> >
> >
> > if (obj->import_attach) {
> > drm_prime_gem_destroy(obj, shmem->sgt);
> > } else {
> > ...
> > // Your changes are here.
> > ...
>
> This branch is taken for the dma-buf in the prime import error code path.

I suggested a fix for this error that didn't involve adding a new flag,
but that's orthogonal to the piece of code we're discussing anyway.

> But yes, the pages_use_count=0 for the dma-buf and then it can be
> written as:
>
> if (obj->import_attach) {
> drm_prime_gem_destroy(obj, shmem->sgt);
> } else {
> drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count));
>
> if (shmem->sgt && refcount_read(&shmem->pages_use_count)) {

You should drop the '&& refcount_read(&shmem->pages_use_count)',
otherwise you'll never enter this branch (sgt allocation retained
a ref, so pages_use_count > 0 when ->sgt != NULL).

If you added this pages_use_count > 0 check to deal with the
'free-partially-imported-GEM' case, I keep thinking this is not
the right fix. You should just assume that obj->import_attach == NULL
means not-a-prime-buffer, and then make sure
partially-initialized-prime-GEMs have import_attach assigned (see the
oneliner I suggested in my review of
`[PATCH v15 01/23] drm/shmem-helper: Fix UAF in error path when
freeing SGT of imported GEM`).

> dma_unmap_sgtable(obj->dev->dev, shmem->sgt,
> DMA_BIDIRECTIONAL, 0);
> sg_free_table(shmem->sgt);
> kfree(shmem->sgt);
>
> __drm_gem_shmem_put_pages(shmem);

You need to decrement pages_use_count:

/* shmem->pages_use_count should be 1 when ->sgt != NULL and
* zero otherwise. If some users still hold a pages reference
* that's a bug, and we intentionally leak the pages so they
* can't be re-allocated to someone else while the GPU/CPU
* still have access to it.
*/
if (refcount_dec_and_test(&shmem->pages_use_count))
__drm_gem_shmem_put_pages(shmem);

> }
>
> drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count));

And now this WARN_ON() ^ should catch unexpected pages leak.

>
> Alright, I'll check if it works as expected for fixing the error code path bug for v17
>

2023-09-14 17:45:29

by Boris Brezillon

[permalink] [raw]
Subject: Re: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker

On Thu, 14 Sep 2023 15:27:03 +0200
Boris Brezillon <[email protected]> wrote:

> You should drop the '&& refcount_read(&shmem->pages_use_count)',
> otherwise you'll never enter this branch (sgt allocation retained
> a ref, so pages_use_count > 0 when ->sgt != NULL).

Sorry for the brain fart. You can drop this extra test because its
redundant (->sgt != NULL implies pages_use_count > 0), but it shouldn't
prevent you from entering the branch.