2024-04-04 14:00:41

by Adrián Larumbe

[permalink] [raw]
Subject: [PATCH] drm/panfrost: Show overall GPU usage stats through sysfs knob

This changeset is heavily inspired by commit 509433d8146c ("drm/v3d: Expose
the total GPU usage stats on sysfs"). The point is making broader GPU
occupancy numbers available through the sysfs interface, so that for every
job slot, its number of processed jobs and total processing time are
displayed.

Cc: Boris Brezillon <[email protected]>
Cc: Christopher Healy <[email protected]>
Signed-off-by: Adrián Larumbe <[email protected]>
---
drivers/gpu/drm/panfrost/panfrost_device.h | 5 +++
drivers/gpu/drm/panfrost/panfrost_drv.c | 49 ++++++++++++++++++++--
drivers/gpu/drm/panfrost/panfrost_job.c | 17 +++++++-
drivers/gpu/drm/panfrost/panfrost_job.h | 3 ++
4 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index cffcb0ac7c11..1d343351c634 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -169,6 +169,11 @@ struct panfrost_engine_usage {
unsigned long long cycles[NUM_JOB_SLOTS];
};

+struct panfrost_slot_usage {
+ u64 enabled_ns;
+ u64 jobs_sent;
+};
+
struct panfrost_file_priv {
struct panfrost_device *pfdev;

diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index ef9f6c0716d5..6afcde66270f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -8,6 +8,7 @@
#include <linux/pagemap.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/sched/clock.h>
#include <drm/panfrost_drm.h>
#include <drm/drm_drv.h>
#include <drm/drm_ioctl.h>
@@ -524,6 +525,10 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
};

+static const char * const engine_names[] = {
+ "fragment", "vertex-tiler", "compute-only"
+};
+
static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
struct panfrost_file_priv *panfrost_priv,
struct drm_printer *p)
@@ -543,10 +548,6 @@ static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
* job spent on the GPU.
*/

- static const char * const engine_names[] = {
- "fragment", "vertex-tiler", "compute-only"
- };
-
BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);

for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
@@ -716,8 +717,48 @@ static ssize_t profiling_store(struct device *dev,

static DEVICE_ATTR_RW(profiling);

+static ssize_t
+gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct panfrost_device *pfdev = dev_get_drvdata(dev);
+ struct panfrost_slot_usage stats;
+ u64 timestamp = local_clock();
+ ssize_t len = 0;
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
+
+ len += sysfs_emit(buf, "queue timestamp jobs runtime\n");
+ len += sysfs_emit_at(buf, len, "-------------------------------------------------\n");
+
+ for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
+
+ stats = get_slot_stats(pfdev, i);
+
+ /*
+ * Each line will display the slot name, timestamp, the number
+ * of jobs handled by that engine and runtime, as shown below:
+ *
+ * queue timestamp jobs runtime
+ * -------------------------------------------------
+ * fragment 12252943467507 638 1184747640
+ * vertex-tiler 12252943467507 636 121663838
+ *
+ */
+ len += sysfs_emit_at(buf, len, "%-13s%-17llu%-12llu%llu\n",
+ engine_names[i],
+ timestamp,
+ stats.jobs_sent,
+ stats.enabled_ns);
+ }
+
+ return len;
+}
+static DEVICE_ATTR_RO(gpu_stats);
+
static struct attribute *panfrost_attrs[] = {
&dev_attr_profiling.attr,
+ &dev_attr_gpu_stats.attr,
NULL,
};

diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a61ef0af9a4e..4c779e6f4cb0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -31,6 +31,8 @@ struct panfrost_queue_state {
struct drm_gpu_scheduler sched;
u64 fence_context;
u64 emit_seqno;
+
+ struct panfrost_slot_usage stats;
};

struct panfrost_job_slot {
@@ -160,15 +162,20 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)

WARN_ON(!job);
if (job->is_profiled) {
+ u64 job_time = ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
+
if (job->engine_usage) {
- job->engine_usage->elapsed_ns[slot] +=
- ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
+ job->engine_usage->elapsed_ns[slot] += job_time;
job->engine_usage->cycles[slot] +=
panfrost_cycle_counter_read(pfdev) - job->start_cycles;
}
+
panfrost_cycle_counter_put(job->pfdev);
+ pfdev->js->queue[slot].stats.enabled_ns += job_time;
}

+ pfdev->js->queue[slot].stats.jobs_sent++;
+
pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
pfdev->jobs[slot][1] = NULL;

@@ -987,3 +994,9 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)

return true;
}
+
+struct panfrost_slot_usage
+get_slot_stats(struct panfrost_device *pfdev, unsigned int slot)
+{
+ return pfdev->js->queue[slot].stats;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index ec581b97852b..e9e2c9db0526 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -50,4 +50,7 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
void panfrost_job_suspend_irq(struct panfrost_device *pfdev);
int panfrost_job_is_idle(struct panfrost_device *pfdev);

+struct panfrost_slot_usage
+get_slot_stats(struct panfrost_device *pfdev, unsigned int slot);
+
#endif

base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
prerequisite-patch-id: 06ac397dd381984bfbff2a7661320c4f05470635
--
2.44.0



2024-04-04 14:33:18

by Maíra Canal

[permalink] [raw]
Subject: Re: [PATCH] drm/panfrost: Show overall GPU usage stats through sysfs knob

On 4/4/24 11:00, Adrián Larumbe wrote:
> This changeset is heavily inspired by commit 509433d8146c ("drm/v3d: Expose
> the total GPU usage stats on sysfs"). The point is making broader GPU
> occupancy numbers available through the sysfs interface, so that for every
> job slot, its number of processed jobs and total processing time are
> displayed.

Shouldn't we make this sysfs interface a generic DRM interface?
Something that would be standard for all drivers and that we could
integrate into gputop in the future.

Best Regards,
- Maíra

>
> Cc: Boris Brezillon <[email protected]>
> Cc: Christopher Healy <[email protected]>
> Signed-off-by: Adrián Larumbe <[email protected]>
> ---
> drivers/gpu/drm/panfrost/panfrost_device.h | 5 +++
> drivers/gpu/drm/panfrost/panfrost_drv.c | 49 ++++++++++++++++++++--
> drivers/gpu/drm/panfrost/panfrost_job.c | 17 +++++++-
> drivers/gpu/drm/panfrost/panfrost_job.h | 3 ++
> 4 files changed, 68 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index cffcb0ac7c11..1d343351c634 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -169,6 +169,11 @@ struct panfrost_engine_usage {
> unsigned long long cycles[NUM_JOB_SLOTS];
> };
>
> +struct panfrost_slot_usage {
> + u64 enabled_ns;
> + u64 jobs_sent;
> +};
> +
> struct panfrost_file_priv {
> struct panfrost_device *pfdev;
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index ef9f6c0716d5..6afcde66270f 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -8,6 +8,7 @@
> #include <linux/pagemap.h>
> #include <linux/platform_device.h>
> #include <linux/pm_runtime.h>
> +#include <linux/sched/clock.h>
> #include <drm/panfrost_drm.h>
> #include <drm/drm_drv.h>
> #include <drm/drm_ioctl.h>
> @@ -524,6 +525,10 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
> PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
> };
>
> +static const char * const engine_names[] = {
> + "fragment", "vertex-tiler", "compute-only"
> +};
> +
> static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> struct panfrost_file_priv *panfrost_priv,
> struct drm_printer *p)
> @@ -543,10 +548,6 @@ static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> * job spent on the GPU.
> */
>
> - static const char * const engine_names[] = {
> - "fragment", "vertex-tiler", "compute-only"
> - };
> -
> BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
>
> for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> @@ -716,8 +717,48 @@ static ssize_t profiling_store(struct device *dev,
>
> static DEVICE_ATTR_RW(profiling);
>
> +static ssize_t
> +gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> + struct panfrost_device *pfdev = dev_get_drvdata(dev);
> + struct panfrost_slot_usage stats;
> + u64 timestamp = local_clock();
> + ssize_t len = 0;
> + unsigned int i;
> +
> + BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
> +
> + len += sysfs_emit(buf, "queue timestamp jobs runtime\n");
> + len += sysfs_emit_at(buf, len, "-------------------------------------------------\n");
> +
> + for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> +
> + stats = get_slot_stats(pfdev, i);
> +
> + /*
> + * Each line will display the slot name, timestamp, the number
> + * of jobs handled by that engine and runtime, as shown below:
> + *
> + * queue timestamp jobs runtime
> + * -------------------------------------------------
> + * fragment 12252943467507 638 1184747640
> + * vertex-tiler 12252943467507 636 121663838
> + *
> + */
> + len += sysfs_emit_at(buf, len, "%-13s%-17llu%-12llu%llu\n",
> + engine_names[i],
> + timestamp,
> + stats.jobs_sent,
> + stats.enabled_ns);
> + }
> +
> + return len;
> +}
> +static DEVICE_ATTR_RO(gpu_stats);
> +
> static struct attribute *panfrost_attrs[] = {
> &dev_attr_profiling.attr,
> + &dev_attr_gpu_stats.attr,
> NULL,
> };
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index a61ef0af9a4e..4c779e6f4cb0 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -31,6 +31,8 @@ struct panfrost_queue_state {
> struct drm_gpu_scheduler sched;
> u64 fence_context;
> u64 emit_seqno;
> +
> + struct panfrost_slot_usage stats;
> };
>
> struct panfrost_job_slot {
> @@ -160,15 +162,20 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
>
> WARN_ON(!job);
> if (job->is_profiled) {
> + u64 job_time = ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
> +
> if (job->engine_usage) {
> - job->engine_usage->elapsed_ns[slot] +=
> - ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
> + job->engine_usage->elapsed_ns[slot] += job_time;
> job->engine_usage->cycles[slot] +=
> panfrost_cycle_counter_read(pfdev) - job->start_cycles;
> }
> +
> panfrost_cycle_counter_put(job->pfdev);
> + pfdev->js->queue[slot].stats.enabled_ns += job_time;
> }
>
> + pfdev->js->queue[slot].stats.jobs_sent++;
> +
> pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
> pfdev->jobs[slot][1] = NULL;
>
> @@ -987,3 +994,9 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)
>
> return true;
> }
> +
> +struct panfrost_slot_usage
> +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot)
> +{
> + return pfdev->js->queue[slot].stats;
> +}
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> index ec581b97852b..e9e2c9db0526 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> @@ -50,4 +50,7 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
> void panfrost_job_suspend_irq(struct panfrost_device *pfdev);
> int panfrost_job_is_idle(struct panfrost_device *pfdev);
>
> +struct panfrost_slot_usage
> +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot);
> +
> #endif
>
> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> prerequisite-patch-id: 06ac397dd381984bfbff2a7661320c4f05470635

2024-04-04 21:30:57

by Adrián Larumbe

[permalink] [raw]
Subject: Re: [PATCH] drm/panfrost: Show overall GPU usage stats through sysfs knob

On 04.04.2024 11:31, Maíra Canal wrote:
> On 4/4/24 11:00, Adrián Larumbe wrote:
> > This changeset is heavily inspired by commit 509433d8146c ("drm/v3d: Expose
> > the total GPU usage stats on sysfs"). The point is making broader GPU
> > occupancy numbers available through the sysfs interface, so that for every
> > job slot, its number of processed jobs and total processing time are
> > displayed.
>
> Shouldn't we make this sysfs interface a generic DRM interface?
> Something that would be standard for all drivers and that we could
> integrate into gputop in the future.

I think the best way to generalise this sysfs knob would be to create a DRM
class attribute somewhere in drivers/gpu/drm/drm_sysfs.c and then adding a new
function to 'struct drm_driver' that would return a structure with the relevant
information (execution units and their names, number of processed jobs, etc).

What that information would exactly be is up to debate, I guess, since different
drivers might be interested in showing different bits of information.

Laying that down is important because the sysfs file would become part of the
device class API.

I might come up with a new RFC patch series that does precisely that, at least
for v3d and Panfrost, and maybe other people could pitch in with the sort of
things they'd like to see for other drivers?

Cheers,
Adrian

> Best Regards,
> - Maíra
>
> >
> > Cc: Boris Brezillon <[email protected]>
> > Cc: Christopher Healy <[email protected]>
> > Signed-off-by: Adrián Larumbe <[email protected]>
> > ---
> > drivers/gpu/drm/panfrost/panfrost_device.h | 5 +++
> > drivers/gpu/drm/panfrost/panfrost_drv.c | 49 ++++++++++++++++++++--
> > drivers/gpu/drm/panfrost/panfrost_job.c | 17 +++++++-
> > drivers/gpu/drm/panfrost/panfrost_job.h | 3 ++
> > 4 files changed, 68 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index cffcb0ac7c11..1d343351c634 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -169,6 +169,11 @@ struct panfrost_engine_usage {
> > unsigned long long cycles[NUM_JOB_SLOTS];
> > };
> > +struct panfrost_slot_usage {
> > + u64 enabled_ns;
> > + u64 jobs_sent;
> > +};
> > +
> > struct panfrost_file_priv {
> > struct panfrost_device *pfdev;
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index ef9f6c0716d5..6afcde66270f 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -8,6 +8,7 @@
> > #include <linux/pagemap.h>
> > #include <linux/platform_device.h>
> > #include <linux/pm_runtime.h>
> > +#include <linux/sched/clock.h>
> > #include <drm/panfrost_drm.h>
> > #include <drm/drm_drv.h>
> > #include <drm/drm_ioctl.h>
> > @@ -524,6 +525,10 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
> > PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
> > };
> > +static const char * const engine_names[] = {
> > + "fragment", "vertex-tiler", "compute-only"
> > +};
> > +
> > static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> > struct panfrost_file_priv *panfrost_priv,
> > struct drm_printer *p)
> > @@ -543,10 +548,6 @@ static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
> > * job spent on the GPU.
> > */
> > - static const char * const engine_names[] = {
> > - "fragment", "vertex-tiler", "compute-only"
> > - };
> > -
> > BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
> > for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > @@ -716,8 +717,48 @@ static ssize_t profiling_store(struct device *dev,
> > static DEVICE_ATTR_RW(profiling);
> > +static ssize_t
> > +gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
> > +{
> > + struct panfrost_device *pfdev = dev_get_drvdata(dev);
> > + struct panfrost_slot_usage stats;
> > + u64 timestamp = local_clock();
> > + ssize_t len = 0;
> > + unsigned int i;
> > +
> > + BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
> > +
> > + len += sysfs_emit(buf, "queue timestamp jobs runtime\n");
> > + len += sysfs_emit_at(buf, len, "-------------------------------------------------\n");
> > +
> > + for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > +
> > + stats = get_slot_stats(pfdev, i);
> > +
> > + /*
> > + * Each line will display the slot name, timestamp, the number
> > + * of jobs handled by that engine and runtime, as shown below:
> > + *
> > + * queue timestamp jobs runtime
> > + * -------------------------------------------------
> > + * fragment 12252943467507 638 1184747640
> > + * vertex-tiler 12252943467507 636 121663838
> > + *
> > + */
> > + len += sysfs_emit_at(buf, len, "%-13s%-17llu%-12llu%llu\n",
> > + engine_names[i],
> > + timestamp,
> > + stats.jobs_sent,
> > + stats.enabled_ns);
> > + }
> > +
> > + return len;
> > +}
> > +static DEVICE_ATTR_RO(gpu_stats);
> > +
> > static struct attribute *panfrost_attrs[] = {
> > &dev_attr_profiling.attr,
> > + &dev_attr_gpu_stats.attr,
> > NULL,
> > };
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index a61ef0af9a4e..4c779e6f4cb0 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -31,6 +31,8 @@ struct panfrost_queue_state {
> > struct drm_gpu_scheduler sched;
> > u64 fence_context;
> > u64 emit_seqno;
> > +
> > + struct panfrost_slot_usage stats;
> > };
> > struct panfrost_job_slot {
> > @@ -160,15 +162,20 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
> > WARN_ON(!job);
> > if (job->is_profiled) {
> > + u64 job_time = ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
> > +
> > if (job->engine_usage) {
> > - job->engine_usage->elapsed_ns[slot] +=
> > - ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
> > + job->engine_usage->elapsed_ns[slot] += job_time;
> > job->engine_usage->cycles[slot] +=
> > panfrost_cycle_counter_read(pfdev) - job->start_cycles;
> > }
> > +
> > panfrost_cycle_counter_put(job->pfdev);
> > + pfdev->js->queue[slot].stats.enabled_ns += job_time;
> > }
> > + pfdev->js->queue[slot].stats.jobs_sent++;
> > +
> > pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
> > pfdev->jobs[slot][1] = NULL;
> > @@ -987,3 +994,9 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)
> > return true;
> > }
> > +
> > +struct panfrost_slot_usage
> > +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot)
> > +{
> > + return pfdev->js->queue[slot].stats;
> > +}
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> > index ec581b97852b..e9e2c9db0526 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> > @@ -50,4 +50,7 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
> > void panfrost_job_suspend_irq(struct panfrost_device *pfdev);
> > int panfrost_job_is_idle(struct panfrost_device *pfdev);
> > +struct panfrost_slot_usage
> > +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot);
> > +
> > #endif
> >
> > base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> > prerequisite-patch-id: 06ac397dd381984bfbff2a7661320c4f05470635

2024-04-08 11:08:09

by Maíra Canal

[permalink] [raw]
Subject: Re: [PATCH] drm/panfrost: Show overall GPU usage stats through sysfs knob

On 4/4/24 18:30, Adrián Larumbe wrote:
> On 04.04.2024 11:31, Maíra Canal wrote:
>> On 4/4/24 11:00, Adrián Larumbe wrote:
>>> This changeset is heavily inspired by commit 509433d8146c ("drm/v3d: Expose
>>> the total GPU usage stats on sysfs"). The point is making broader GPU
>>> occupancy numbers available through the sysfs interface, so that for every
>>> job slot, its number of processed jobs and total processing time are
>>> displayed.
>>
>> Shouldn't we make this sysfs interface a generic DRM interface?
>> Something that would be standard for all drivers and that we could
>> integrate into gputop in the future.
>
> I think the best way to generalise this sysfs knob would be to create a DRM
> class attribute somewhere in drivers/gpu/drm/drm_sysfs.c and then adding a new
> function to 'struct drm_driver' that would return a structure with the relevant
> information (execution units and their names, number of processed jobs, etc).

These is exactly what I was thinking about.

>
> What that information would exactly be is up to debate, I guess, since different
> drivers might be interested in showing different bits of information.

I believe we can start with the requirements of V3D and Panfrost and
them, expand from it.

>
> Laying that down is important because the sysfs file would become part of the
> device class API.

My PoV: it is important, but not completly tragic if we don't get it
perfect. Just like fdinfo, which is evolving.

>
> I might come up with a new RFC patch series that does precisely that, at least
> for v3d and Panfrost, and maybe other people could pitch in with the sort of
> things they'd like to see for other drivers?

Yeah, this would be a great idea. Please, CC me on this series.

Best Regards,
- Maíra

>
> Cheers,
> Adrian
>
>> Best Regards,
>> - Maíra
>>
>>>
>>> Cc: Boris Brezillon <[email protected]>
>>> Cc: Christopher Healy <[email protected]>
>>> Signed-off-by: Adrián Larumbe <[email protected]>
>>> ---
>>> drivers/gpu/drm/panfrost/panfrost_device.h | 5 +++
>>> drivers/gpu/drm/panfrost/panfrost_drv.c | 49 ++++++++++++++++++++--
>>> drivers/gpu/drm/panfrost/panfrost_job.c | 17 +++++++-
>>> drivers/gpu/drm/panfrost/panfrost_job.h | 3 ++
>>> 4 files changed, 68 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> index cffcb0ac7c11..1d343351c634 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> @@ -169,6 +169,11 @@ struct panfrost_engine_usage {
>>> unsigned long long cycles[NUM_JOB_SLOTS];
>>> };
>>> +struct panfrost_slot_usage {
>>> + u64 enabled_ns;
>>> + u64 jobs_sent;
>>> +};
>>> +
>>> struct panfrost_file_priv {
>>> struct panfrost_device *pfdev;
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> index ef9f6c0716d5..6afcde66270f 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
>>> @@ -8,6 +8,7 @@
>>> #include <linux/pagemap.h>
>>> #include <linux/platform_device.h>
>>> #include <linux/pm_runtime.h>
>>> +#include <linux/sched/clock.h>
>>> #include <drm/panfrost_drm.h>
>>> #include <drm/drm_drv.h>
>>> #include <drm/drm_ioctl.h>
>>> @@ -524,6 +525,10 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = {
>>> PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
>>> };
>>> +static const char * const engine_names[] = {
>>> + "fragment", "vertex-tiler", "compute-only"
>>> +};
>>> +
>>> static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
>>> struct panfrost_file_priv *panfrost_priv,
>>> struct drm_printer *p)
>>> @@ -543,10 +548,6 @@ static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
>>> * job spent on the GPU.
>>> */
>>> - static const char * const engine_names[] = {
>>> - "fragment", "vertex-tiler", "compute-only"
>>> - };
>>> -
>>> BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
>>> for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
>>> @@ -716,8 +717,48 @@ static ssize_t profiling_store(struct device *dev,
>>> static DEVICE_ATTR_RW(profiling);
>>> +static ssize_t
>>> +gpu_stats_show(struct device *dev, struct device_attribute *attr, char *buf)
>>> +{
>>> + struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>> + struct panfrost_slot_usage stats;
>>> + u64 timestamp = local_clock();
>>> + ssize_t len = 0;
>>> + unsigned int i;
>>> +
>>> + BUILD_BUG_ON(ARRAY_SIZE(engine_names) != NUM_JOB_SLOTS);
>>> +
>>> + len += sysfs_emit(buf, "queue timestamp jobs runtime\n");
>>> + len += sysfs_emit_at(buf, len, "-------------------------------------------------\n");
>>> +
>>> + for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {
>>> +
>>> + stats = get_slot_stats(pfdev, i);
>>> +
>>> + /*
>>> + * Each line will display the slot name, timestamp, the number
>>> + * of jobs handled by that engine and runtime, as shown below:
>>> + *
>>> + * queue timestamp jobs runtime
>>> + * -------------------------------------------------
>>> + * fragment 12252943467507 638 1184747640
>>> + * vertex-tiler 12252943467507 636 121663838
>>> + *
>>> + */
>>> + len += sysfs_emit_at(buf, len, "%-13s%-17llu%-12llu%llu\n",
>>> + engine_names[i],
>>> + timestamp,
>>> + stats.jobs_sent,
>>> + stats.enabled_ns);
>>> + }
>>> +
>>> + return len;
>>> +}
>>> +static DEVICE_ATTR_RO(gpu_stats);
>>> +
>>> static struct attribute *panfrost_attrs[] = {
>>> &dev_attr_profiling.attr,
>>> + &dev_attr_gpu_stats.attr,
>>> NULL,
>>> };
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
>>> index a61ef0af9a4e..4c779e6f4cb0 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
>>> @@ -31,6 +31,8 @@ struct panfrost_queue_state {
>>> struct drm_gpu_scheduler sched;
>>> u64 fence_context;
>>> u64 emit_seqno;
>>> +
>>> + struct panfrost_slot_usage stats;
>>> };
>>> struct panfrost_job_slot {
>>> @@ -160,15 +162,20 @@ panfrost_dequeue_job(struct panfrost_device *pfdev, int slot)
>>> WARN_ON(!job);
>>> if (job->is_profiled) {
>>> + u64 job_time = ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
>>> +
>>> if (job->engine_usage) {
>>> - job->engine_usage->elapsed_ns[slot] +=
>>> - ktime_to_ns(ktime_sub(ktime_get(), job->start_time));
>>> + job->engine_usage->elapsed_ns[slot] += job_time;
>>> job->engine_usage->cycles[slot] +=
>>> panfrost_cycle_counter_read(pfdev) - job->start_cycles;
>>> }
>>> +
>>> panfrost_cycle_counter_put(job->pfdev);
>>> + pfdev->js->queue[slot].stats.enabled_ns += job_time;
>>> }
>>> + pfdev->js->queue[slot].stats.jobs_sent++;
>>> +
>>> pfdev->jobs[slot][0] = pfdev->jobs[slot][1];
>>> pfdev->jobs[slot][1] = NULL;
>>> @@ -987,3 +994,9 @@ int panfrost_job_is_idle(struct panfrost_device *pfdev)
>>> return true;
>>> }
>>> +
>>> +struct panfrost_slot_usage
>>> +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot)
>>> +{
>>> + return pfdev->js->queue[slot].stats;
>>> +}
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
>>> index ec581b97852b..e9e2c9db0526 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_job.h
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
>>> @@ -50,4 +50,7 @@ void panfrost_job_enable_interrupts(struct panfrost_device *pfdev);
>>> void panfrost_job_suspend_irq(struct panfrost_device *pfdev);
>>> int panfrost_job_is_idle(struct panfrost_device *pfdev);
>>> +struct panfrost_slot_usage
>>> +get_slot_stats(struct panfrost_device *pfdev, unsigned int slot);
>>> +
>>> #endif
>>>
>>> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
>>> prerequisite-patch-id: 06ac397dd381984bfbff2a7661320c4f05470635