BO's RSS is updated every time new pages are allocated on demand and mapped
for the object at GPU page fault's IRQ handler, but only for heap buffers.
The reason this is unnecessary for non-heap buffers is that they are mapped
onto the GPU's VA space and backed by physical memory in their entirety at
BO creation time.
This calculation is unnecessary for imported PRIME objects, since heap
buffers cannot be exported by our driver, and the actual BO RSS size is the
one reported in its attached dmabuf structure.
Signed-off-by: Adrián Larumbe <[email protected]>
Reviewed-by: Boris Brezillon <[email protected]>
---
drivers/gpu/drm/panfrost/panfrost_gem.c | 15 +++++++++++++++
drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++
drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 +
3 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 7d8f83d20539..4365434b48db 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -208,6 +208,20 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
return res;
}
+static size_t panfrost_gem_rss(struct drm_gem_object *obj)
+{
+ struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+
+ if (bo->is_heap) {
+ return bo->heap_rss_size;
+ } else if (bo->base.pages) {
+ WARN_ON(bo->heap_rss_size);
+ return bo->base.base.size;
+ } else {
+ return 0;
+ }
+}
+
static const struct drm_gem_object_funcs panfrost_gem_funcs = {
.free = panfrost_gem_free_object,
.open = panfrost_gem_open,
@@ -220,6 +234,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
.vunmap = drm_gem_shmem_object_vunmap,
.mmap = drm_gem_shmem_object_mmap,
.status = panfrost_gem_status,
+ .rss = panfrost_gem_rss,
.vm_ops = &drm_gem_shmem_vm_ops,
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index ad2877eeeccd..13c0a8149c3a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -36,6 +36,11 @@ struct panfrost_gem_object {
*/
atomic_t gpu_usecount;
+ /*
+ * Object chunk size currently mapped onto physical memory
+ */
+ size_t heap_rss_size;
+
bool noexec :1;
bool is_heap :1;
};
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index d54d4e7b2195..7b1490cdaa48 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -522,6 +522,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
bomapping->active = true;
+ bo->heap_rss_size += SZ_2;
dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
--
2.42.0
On 14/09/2023 23:38, Adrián Larumbe wrote:
> BO's RSS is updated every time new pages are allocated on demand and mapped
> for the object at GPU page fault's IRQ handler, but only for heap buffers.
> The reason this is unnecessary for non-heap buffers is that they are mapped
> onto the GPU's VA space and backed by physical memory in their entirety at
> BO creation time.
>
> This calculation is unnecessary for imported PRIME objects, since heap
> buffers cannot be exported by our driver, and the actual BO RSS size is the
> one reported in its attached dmabuf structure.
>
> Signed-off-by: Adrián Larumbe <[email protected]>
> Reviewed-by: Boris Brezillon <[email protected]>
Am I missing something, or are we missing a way of resetting
heap_rss_size when the shrinker purges? It looks like after several
grow/purge cycles, heap_rss_size could actually grow to be larger than
the BO which is clearly wrong.
Steve
> ---
> drivers/gpu/drm/panfrost/panfrost_gem.c | 15 +++++++++++++++
> drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++
> drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 +
> 3 files changed, 21 insertions(+)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> index 7d8f83d20539..4365434b48db 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> @@ -208,6 +208,20 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
> return res;
> }
>
> +static size_t panfrost_gem_rss(struct drm_gem_object *obj)
> +{
> + struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> +
> + if (bo->is_heap) {
> + return bo->heap_rss_size;
> + } else if (bo->base.pages) {
> + WARN_ON(bo->heap_rss_size);
> + return bo->base.base.size;
> + } else {
> + return 0;
> + }
> +}
> +
> static const struct drm_gem_object_funcs panfrost_gem_funcs = {
> .free = panfrost_gem_free_object,
> .open = panfrost_gem_open,
> @@ -220,6 +234,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
> .vunmap = drm_gem_shmem_object_vunmap,
> .mmap = drm_gem_shmem_object_mmap,
> .status = panfrost_gem_status,
> + .rss = panfrost_gem_rss,
> .vm_ops = &drm_gem_shmem_vm_ops,
> };
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> index ad2877eeeccd..13c0a8149c3a 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> @@ -36,6 +36,11 @@ struct panfrost_gem_object {
> */
> atomic_t gpu_usecount;
>
> + /*
> + * Object chunk size currently mapped onto physical memory
> + */
> + size_t heap_rss_size;
> +
> bool noexec :1;
> bool is_heap :1;
> };
> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> index d54d4e7b2195..7b1490cdaa48 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> @@ -522,6 +522,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>
> bomapping->active = true;
> + bo->heap_rss_size += SZ_2;
>
> dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
>
On Mon, 18 Sep 2023 11:01:43 +0100
Steven Price <[email protected]> wrote:
> On 14/09/2023 23:38, Adrián Larumbe wrote:
> > BO's RSS is updated every time new pages are allocated on demand and mapped
> > for the object at GPU page fault's IRQ handler, but only for heap buffers.
> > The reason this is unnecessary for non-heap buffers is that they are mapped
> > onto the GPU's VA space and backed by physical memory in their entirety at
> > BO creation time.
> >
> > This calculation is unnecessary for imported PRIME objects, since heap
> > buffers cannot be exported by our driver, and the actual BO RSS size is the
> > one reported in its attached dmabuf structure.
> >
> > Signed-off-by: Adrián Larumbe <[email protected]>
> > Reviewed-by: Boris Brezillon <[email protected]>
>
> Am I missing something, or are we missing a way of resetting
> heap_rss_size when the shrinker purges? It looks like after several
> grow/purge cycles, heap_rss_size could actually grow to be larger than
> the BO which is clearly wrong.
Didn't even consider this case since we don't flag heap BOs purgeable
in mesa(panfrost), but let's assume we did. If the BO is purged, I'd
expect the core to report 0MB of resident memory anyway. And purged BOs
are not supposed to be re-used if MADVISE(WILL_NEED) returns
retained=false, they should be destroyed. Not 100% sure this is
enforced everywhere though (we might actually miss tests to make sure
users don't pass purged BOs to jobs, or make the alloc-on-fault logic
doesn't try to grow a purged GEM).
If we want to implement transparent BO swap{out,in} (Dmitry's
patchset), that's be a different story, and we'll indeed have to set
heap_rss_size back to zero on eviction.
>
> Steve
>
> > ---
> > drivers/gpu/drm/panfrost/panfrost_gem.c | 15 +++++++++++++++
> > drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++
> > drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 +
> > 3 files changed, 21 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > index 7d8f83d20539..4365434b48db 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
> > @@ -208,6 +208,20 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
> > return res;
> > }
> >
> > +static size_t panfrost_gem_rss(struct drm_gem_object *obj)
> > +{
> > + struct panfrost_gem_object *bo = to_panfrost_bo(obj);
> > +
> > + if (bo->is_heap) {
> > + return bo->heap_rss_size;
> > + } else if (bo->base.pages) {
> > + WARN_ON(bo->heap_rss_size);
> > + return bo->base.base.size;
> > + } else {
> > + return 0;
> > + }
> > +}
> > +
> > static const struct drm_gem_object_funcs panfrost_gem_funcs = {
> > .free = panfrost_gem_free_object,
> > .open = panfrost_gem_open,
> > @@ -220,6 +234,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
> > .vunmap = drm_gem_shmem_object_vunmap,
> > .mmap = drm_gem_shmem_object_mmap,
> > .status = panfrost_gem_status,
> > + .rss = panfrost_gem_rss,
> > .vm_ops = &drm_gem_shmem_vm_ops,
> > };
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
> > index ad2877eeeccd..13c0a8149c3a 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
> > @@ -36,6 +36,11 @@ struct panfrost_gem_object {
> > */
> > atomic_t gpu_usecount;
> >
> > + /*
> > + * Object chunk size currently mapped onto physical memory
> > + */
> > + size_t heap_rss_size;
> > +
> > bool noexec :1;
> > bool is_heap :1;
> > };
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > index d54d4e7b2195..7b1490cdaa48 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
> > @@ -522,6 +522,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
> > IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
> >
> > bomapping->active = true;
> > + bo->heap_rss_size += SZ_2;
> >
> > dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
> >
>
On 18/09/2023 11:32, Boris Brezillon wrote:
> On Mon, 18 Sep 2023 11:01:43 +0100
> Steven Price <[email protected]> wrote:
>
>> On 14/09/2023 23:38, Adrián Larumbe wrote:
>>> BO's RSS is updated every time new pages are allocated on demand and mapped
>>> for the object at GPU page fault's IRQ handler, but only for heap buffers.
>>> The reason this is unnecessary for non-heap buffers is that they are mapped
>>> onto the GPU's VA space and backed by physical memory in their entirety at
>>> BO creation time.
>>>
>>> This calculation is unnecessary for imported PRIME objects, since heap
>>> buffers cannot be exported by our driver, and the actual BO RSS size is the
>>> one reported in its attached dmabuf structure.
>>>
>>> Signed-off-by: Adrián Larumbe <[email protected]>
>>> Reviewed-by: Boris Brezillon <[email protected]>
>>
>> Am I missing something, or are we missing a way of resetting
>> heap_rss_size when the shrinker purges? It looks like after several
>> grow/purge cycles, heap_rss_size could actually grow to be larger than
>> the BO which is clearly wrong.
>
> Didn't even consider this case since we don't flag heap BOs purgeable
> in mesa(panfrost), but let's assume we did. If the BO is purged, I'd
> expect the core to report 0MB of resident memory anyway. And purged BOs
> are not supposed to be re-used if MADVISE(WILL_NEED) returns
> retained=false, they should be destroyed. Not 100% sure this is
> enforced everywhere though (we might actually miss tests to make sure
> users don't pass purged BOs to jobs, or make the alloc-on-fault logic
> doesn't try to grow a purged GEM).
>
> If we want to implement transparent BO swap{out,in} (Dmitry's
> patchset), that's be a different story, and we'll indeed have to set
> heap_rss_size back to zero on eviction.
Ah, ok. So we should be safe as things stand - but this is something to
remember about in the future. Looking more closely at the code I can see
an madvise(WILL_NEED) will fail if retained=false
(drm_gem_shmem_madvise() only updates the state it shmem->madv >= 0).
In which case:
Reviewed-by: Steven Price <[email protected]>
>>
>> Steve
>>
>>> ---
>>> drivers/gpu/drm/panfrost/panfrost_gem.c | 15 +++++++++++++++
>>> drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +++++
>>> drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 +
>>> 3 files changed, 21 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
>>> index 7d8f83d20539..4365434b48db 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_gem.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
>>> @@ -208,6 +208,20 @@ static enum drm_gem_object_status panfrost_gem_status(struct drm_gem_object *obj
>>> return res;
>>> }
>>>
>>> +static size_t panfrost_gem_rss(struct drm_gem_object *obj)
>>> +{
>>> + struct panfrost_gem_object *bo = to_panfrost_bo(obj);
>>> +
>>> + if (bo->is_heap) {
>>> + return bo->heap_rss_size;
>>> + } else if (bo->base.pages) {
>>> + WARN_ON(bo->heap_rss_size);
>>> + return bo->base.base.size;
>>> + } else {
>>> + return 0;
>>> + }
>>> +}
>>> +
>>> static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>>> .free = panfrost_gem_free_object,
>>> .open = panfrost_gem_open,
>>> @@ -220,6 +234,7 @@ static const struct drm_gem_object_funcs panfrost_gem_funcs = {
>>> .vunmap = drm_gem_shmem_object_vunmap,
>>> .mmap = drm_gem_shmem_object_mmap,
>>> .status = panfrost_gem_status,
>>> + .rss = panfrost_gem_rss,
>>> .vm_ops = &drm_gem_shmem_vm_ops,
>>> };
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
>>> index ad2877eeeccd..13c0a8149c3a 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_gem.h
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
>>> @@ -36,6 +36,11 @@ struct panfrost_gem_object {
>>> */
>>> atomic_t gpu_usecount;
>>>
>>> + /*
>>> + * Object chunk size currently mapped onto physical memory
>>> + */
>>> + size_t heap_rss_size;
>>> +
>>> bool noexec :1;
>>> bool is_heap :1;
>>> };
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> index d54d4e7b2195..7b1490cdaa48 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
>>> @@ -522,6 +522,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
>>> IOMMU_WRITE | IOMMU_READ | IOMMU_NOEXEC, sgt);
>>>
>>> bomapping->active = true;
>>> + bo->heap_rss_size += SZ_2;
>>>
>>> dev_dbg(pfdev->dev, "mapped page fault @ AS%d %llx", as, addr);
>>>
>>
>