Up to this day, all fdinfo-based GPU profilers must traverse the entire
/proc directory structure to find open DRM clients with fdinfo file
descriptors. This is inefficient and time-consuming.
This patch adds a new device class attribute that will install a sysfs file
per DRM device, which can be queried by profilers to get a list of PIDs for
their open clients. This file isn't human-readable, and it's meant to be
queried only by GPU profilers like gputop and nvtop.
Cc: Boris Brezillon <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
Cc: Christopher Healy <[email protected]>
Signed-off-by: Adrián Larumbe <[email protected]>
---
drivers/gpu/drm/drm_internal.h | 2 +-
drivers/gpu/drm/drm_privacy_screen.c | 2 +-
drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
3 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 2215baef9a3e..9a399b03d11c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
void drm_master_internal_release(struct drm_device *dev);
/* drm_sysfs.c */
-extern struct class *drm_class;
+extern struct class drm_class;
int drm_sysfs_init(void);
void drm_sysfs_destroy(void);
diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
index 6cc39e30781f..2fbd24ba5818 100644
--- a/drivers/gpu/drm/drm_privacy_screen.c
+++ b/drivers/gpu/drm/drm_privacy_screen.c
@@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
mutex_init(&priv->lock);
BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
- priv->dev.class = drm_class;
+ priv->dev.class = &drm_class;
priv->dev.type = &drm_privacy_screen_type;
priv->dev.parent = parent;
priv->dev.release = drm_privacy_screen_device_release;
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index a953f69a34b6..56ca9e22c720 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
.name = "drm_connector",
};
-struct class *drm_class;
-
#ifdef CONFIG_ACPI
static bool drm_connector_acpi_bus_match(struct device *dev)
{
@@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
+static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
+{
+ struct drm_minor *minor = cd->driver_data;
+ struct drm_device *ddev = minor->dev;
+ struct drm_file *priv;
+ ssize_t offset = 0;
+ void *pid_buf;
+
+ if (minor->type != DRM_MINOR_RENDER)
+ return 0;
+
+ pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!pid_buf)
+ return 0;
+
+ mutex_lock(&ddev->filelist_mutex);
+ list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
+ struct pid *pid;
+
+ if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
+ break;
+
+ rcu_read_lock();
+ pid = rcu_dereference(priv->pid);
+ (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
+ rcu_read_unlock();
+
+ offset += sizeof(pid_t);
+ }
+ mutex_unlock(&ddev->filelist_mutex);
+
+ if (offset < PAGE_SIZE)
+ (*(pid_t *)(pid_buf + offset)) = 0;
+
+ memcpy(buf, pid_buf, offset);
+
+ kvfree(pid_buf);
+
+ return offset;
+
+}
+static DEVICE_ATTR_RO(clients);
+
+static struct attribute *drm_device_attrs[] = {
+ &dev_attr_clients.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(drm_device);
+
+struct class drm_class = {
+ .name = "drm",
+ .dev_groups = drm_device_groups,
+};
+
+static bool drm_class_initialised;
+
/**
* drm_sysfs_init - initialize sysfs helpers
*
@@ -142,18 +196,19 @@ int drm_sysfs_init(void)
{
int err;
- drm_class = class_create("drm");
- if (IS_ERR(drm_class))
- return PTR_ERR(drm_class);
+ err = class_register(&drm_class);
+ if (err)
+ return err;
- err = class_create_file(drm_class, &class_attr_version.attr);
+ err = class_create_file(&drm_class, &class_attr_version.attr);
if (err) {
- class_destroy(drm_class);
- drm_class = NULL;
+ class_destroy(&drm_class);
return err;
}
- drm_class->devnode = drm_devnode;
+ drm_class.devnode = drm_devnode;
+
+ drm_class_initialised = true;
drm_sysfs_acpi_register();
return 0;
@@ -166,12 +221,12 @@ int drm_sysfs_init(void)
*/
void drm_sysfs_destroy(void)
{
- if (IS_ERR_OR_NULL(drm_class))
+ if (!drm_class_initialised)
return;
drm_sysfs_acpi_unregister();
- class_remove_file(drm_class, &class_attr_version.attr);
- class_destroy(drm_class);
- drm_class = NULL;
+ class_remove_file(&drm_class, &class_attr_version.attr);
+ class_destroy(&drm_class);
+ drm_class_initialised = false;
}
static void drm_sysfs_release(struct device *dev)
@@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
return -ENOMEM;
device_initialize(kdev);
- kdev->class = drm_class;
+ kdev->class = &drm_class;
kdev->type = &drm_sysfs_device_connector;
kdev->parent = dev->primary->kdev;
kdev->groups = connector_dev_groups;
@@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
minor_str = "card%d";
kdev->devt = MKDEV(DRM_MAJOR, minor->index);
- kdev->class = drm_class;
+ kdev->class = &drm_class;
kdev->type = &drm_sysfs_device_minor;
}
@@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
*/
int drm_class_device_register(struct device *dev)
{
- if (!drm_class || IS_ERR(drm_class))
+ if (!drm_class_initialised)
return -ENOENT;
- dev->class = drm_class;
+ dev->class = &drm_class;
return device_register(dev);
}
EXPORT_SYMBOL_GPL(drm_class_device_register);
base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
--
2.44.0
On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
<[email protected]> wrote:
>
> Up to this day, all fdinfo-based GPU profilers must traverse the entire
> /proc directory structure to find open DRM clients with fdinfo file
> descriptors. This is inefficient and time-consuming.
>
> This patch adds a new device class attribute that will install a sysfs file
> per DRM device, which can be queried by profilers to get a list of PIDs for
> their open clients. This file isn't human-readable, and it's meant to be
> queried only by GPU profilers like gputop and nvtop.
>
> Cc: Boris Brezillon <[email protected]>
> Cc: Tvrtko Ursulin <[email protected]>
> Cc: Christopher Healy <[email protected]>
> Signed-off-by: Adrián Larumbe <[email protected]>
It does seem like a good idea.. idk if there is some precedent to
prefer binary vs ascii in sysfs, but having a way to avoid walking
_all_ processes is a good idea.
BR,
-R
> ---
> drivers/gpu/drm/drm_internal.h | 2 +-
> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
> 3 files changed, 74 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 2215baef9a3e..9a399b03d11c 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
> void drm_master_internal_release(struct drm_device *dev);
>
> /* drm_sysfs.c */
> -extern struct class *drm_class;
> +extern struct class drm_class;
>
> int drm_sysfs_init(void);
> void drm_sysfs_destroy(void);
> diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
> index 6cc39e30781f..2fbd24ba5818 100644
> --- a/drivers/gpu/drm/drm_privacy_screen.c
> +++ b/drivers/gpu/drm/drm_privacy_screen.c
> @@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
> mutex_init(&priv->lock);
> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>
> - priv->dev.class = drm_class;
> + priv->dev.class = &drm_class;
> priv->dev.type = &drm_privacy_screen_type;
> priv->dev.parent = parent;
> priv->dev.release = drm_privacy_screen_device_release;
> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
> index a953f69a34b6..56ca9e22c720 100644
> --- a/drivers/gpu/drm/drm_sysfs.c
> +++ b/drivers/gpu/drm/drm_sysfs.c
> @@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
> .name = "drm_connector",
> };
>
> -struct class *drm_class;
> -
> #ifdef CONFIG_ACPI
> static bool drm_connector_acpi_bus_match(struct device *dev)
> {
> @@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>
> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>
> +static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
> +{
> + struct drm_minor *minor = cd->driver_data;
> + struct drm_device *ddev = minor->dev;
> + struct drm_file *priv;
> + ssize_t offset = 0;
> + void *pid_buf;
> +
> + if (minor->type != DRM_MINOR_RENDER)
> + return 0;
> +
> + pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!pid_buf)
> + return 0;
> +
> + mutex_lock(&ddev->filelist_mutex);
> + list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
> + struct pid *pid;
> +
> + if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
> + break;
> +
> + rcu_read_lock();
> + pid = rcu_dereference(priv->pid);
> + (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
> + rcu_read_unlock();
> +
> + offset += sizeof(pid_t);
> + }
> + mutex_unlock(&ddev->filelist_mutex);
> +
> + if (offset < PAGE_SIZE)
> + (*(pid_t *)(pid_buf + offset)) = 0;
> +
> + memcpy(buf, pid_buf, offset);
> +
> + kvfree(pid_buf);
> +
> + return offset;
> +
> +}
> +static DEVICE_ATTR_RO(clients);
> +
> +static struct attribute *drm_device_attrs[] = {
> + &dev_attr_clients.attr,
> + NULL,
> +};
> +ATTRIBUTE_GROUPS(drm_device);
> +
> +struct class drm_class = {
> + .name = "drm",
> + .dev_groups = drm_device_groups,
> +};
> +
> +static bool drm_class_initialised;
> +
> /**
> * drm_sysfs_init - initialize sysfs helpers
> *
> @@ -142,18 +196,19 @@ int drm_sysfs_init(void)
> {
> int err;
>
> - drm_class = class_create("drm");
> - if (IS_ERR(drm_class))
> - return PTR_ERR(drm_class);
> + err = class_register(&drm_class);
> + if (err)
> + return err;
>
> - err = class_create_file(drm_class, &class_attr_version.attr);
> + err = class_create_file(&drm_class, &class_attr_version.attr);
> if (err) {
> - class_destroy(drm_class);
> - drm_class = NULL;
> + class_destroy(&drm_class);
> return err;
> }
>
> - drm_class->devnode = drm_devnode;
> + drm_class.devnode = drm_devnode;
> +
> + drm_class_initialised = true;
>
> drm_sysfs_acpi_register();
> return 0;
> @@ -166,12 +221,12 @@ int drm_sysfs_init(void)
> */
> void drm_sysfs_destroy(void)
> {
> - if (IS_ERR_OR_NULL(drm_class))
> + if (!drm_class_initialised)
> return;
> drm_sysfs_acpi_unregister();
> - class_remove_file(drm_class, &class_attr_version.attr);
> - class_destroy(drm_class);
> - drm_class = NULL;
> + class_remove_file(&drm_class, &class_attr_version.attr);
> + class_destroy(&drm_class);
> + drm_class_initialised = false;
> }
>
> static void drm_sysfs_release(struct device *dev)
> @@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
> return -ENOMEM;
>
> device_initialize(kdev);
> - kdev->class = drm_class;
> + kdev->class = &drm_class;
> kdev->type = &drm_sysfs_device_connector;
> kdev->parent = dev->primary->kdev;
> kdev->groups = connector_dev_groups;
> @@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> minor_str = "card%d";
>
> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
> - kdev->class = drm_class;
> + kdev->class = &drm_class;
> kdev->type = &drm_sysfs_device_minor;
> }
>
> @@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> */
> int drm_class_device_register(struct device *dev)
> {
> - if (!drm_class || IS_ERR(drm_class))
> + if (!drm_class_initialised)
> return -ENOENT;
>
> - dev->class = drm_class;
> + dev->class = &drm_class;
> return device_register(dev);
> }
> EXPORT_SYMBOL_GPL(drm_class_device_register);
>
> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> --
> 2.44.0
>
On 05/04/2024 18:59, Rob Clark wrote:
> On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
> <[email protected]> wrote:
>>
>> Up to this day, all fdinfo-based GPU profilers must traverse the entire
>> /proc directory structure to find open DRM clients with fdinfo file
>> descriptors. This is inefficient and time-consuming.
>>
>> This patch adds a new device class attribute that will install a sysfs file
>> per DRM device, which can be queried by profilers to get a list of PIDs for
>> their open clients. This file isn't human-readable, and it's meant to be
>> queried only by GPU profilers like gputop and nvtop.
>>
>> Cc: Boris Brezillon <[email protected]>
>> Cc: Tvrtko Ursulin <[email protected]>
>> Cc: Christopher Healy <[email protected]>
>> Signed-off-by: Adrián Larumbe <[email protected]>
>
> It does seem like a good idea.. idk if there is some precedent to
> prefer binary vs ascii in sysfs, but having a way to avoid walking
> _all_ processes is a good idea.
I naturally second that it is a needed feature, but I do not think
binary format is justified. AFAIR it should be used for things like
hw/fw standardised tables or firmware images, not when exporting a
simple list of PIDs. It also precludes easy shell/script access and the
benefit of avoiding parsing a short list is I suspect completely dwarfed
by needing to parse all the related fdinfo etc.
>> ---
>> drivers/gpu/drm/drm_internal.h | 2 +-
>> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
>> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
>> 3 files changed, 74 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> index 2215baef9a3e..9a399b03d11c 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
>> void drm_master_internal_release(struct drm_device *dev);
>>
>> /* drm_sysfs.c */
>> -extern struct class *drm_class;
>> +extern struct class drm_class;
>>
>> int drm_sysfs_init(void);
>> void drm_sysfs_destroy(void);
>> diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
>> index 6cc39e30781f..2fbd24ba5818 100644
>> --- a/drivers/gpu/drm/drm_privacy_screen.c
>> +++ b/drivers/gpu/drm/drm_privacy_screen.c
>> @@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
>> mutex_init(&priv->lock);
>> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>>
>> - priv->dev.class = drm_class;
>> + priv->dev.class = &drm_class;
>> priv->dev.type = &drm_privacy_screen_type;
>> priv->dev.parent = parent;
>> priv->dev.release = drm_privacy_screen_device_release;
>> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
>> index a953f69a34b6..56ca9e22c720 100644
>> --- a/drivers/gpu/drm/drm_sysfs.c
>> +++ b/drivers/gpu/drm/drm_sysfs.c
>> @@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
>> .name = "drm_connector",
>> };
>>
>> -struct class *drm_class;
>> -
>> #ifdef CONFIG_ACPI
>> static bool drm_connector_acpi_bus_match(struct device *dev)
>> {
>> @@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>>
>> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>>
>> +static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
>> +{
>> + struct drm_minor *minor = cd->driver_data;
>> + struct drm_device *ddev = minor->dev;
>> + struct drm_file *priv;
>> + ssize_t offset = 0;
>> + void *pid_buf;
>> +
>> + if (minor->type != DRM_MINOR_RENDER)
>> + return 0;
Why this?
>> +
>> + pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
I don't quite get the kvmalloc for just one page (or why even a temporay
buffer and not write into buf directly?).
>> + if (!pid_buf)
>> + return 0;
>> +
>> + mutex_lock(&ddev->filelist_mutex);
>> + list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
>> + struct pid *pid;
>> +
>> + if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
>> + break;
Feels bad.. I would suggest exploring implementing a read callback
(instead of show) and handling arbitrary size output.
>> +
>> + rcu_read_lock();
>> + pid = rcu_dereference(priv->pid);
>> + (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
>> + rcu_read_unlock();
>> +
>> + offset += sizeof(pid_t);
>> + }
>> + mutex_unlock(&ddev->filelist_mutex);
>> +
>> + if (offset < PAGE_SIZE)
>> + (*(pid_t *)(pid_buf + offset)) = 0;
Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
terminated feels weird. If I got that right.
For me everything points towards going for text output.
>> +
>> + memcpy(buf, pid_buf, offset);
>> +
>> + kvfree(pid_buf);
>> +
>> + return offset;
>> +
>> +}
>> +static DEVICE_ATTR_RO(clients);
Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
Regards,
Tvrtko
P.S. Or maybe it is time for drmfs? Where each client gets a directory
and drivers can populate files. Such as per client logging streams and
whatnot.
>> +
>> +static struct attribute *drm_device_attrs[] = {
>> + &dev_attr_clients.attr,
>> + NULL,
>> +};
>> +ATTRIBUTE_GROUPS(drm_device);
>> +
>> +struct class drm_class = {
>> + .name = "drm",
>> + .dev_groups = drm_device_groups,
>> +};
>> +
>> +static bool drm_class_initialised;
>> +
>> /**
>> * drm_sysfs_init - initialize sysfs helpers
>> *
>> @@ -142,18 +196,19 @@ int drm_sysfs_init(void)
>> {
>> int err;
>>
>> - drm_class = class_create("drm");
>> - if (IS_ERR(drm_class))
>> - return PTR_ERR(drm_class);
>> + err = class_register(&drm_class);
>> + if (err)
>> + return err;
>>
>> - err = class_create_file(drm_class, &class_attr_version.attr);
>> + err = class_create_file(&drm_class, &class_attr_version.attr);
>> if (err) {
>> - class_destroy(drm_class);
>> - drm_class = NULL;
>> + class_destroy(&drm_class);
>> return err;
>> }
>>
>> - drm_class->devnode = drm_devnode;
>> + drm_class.devnode = drm_devnode;
>> +
>> + drm_class_initialised = true;
>>
>> drm_sysfs_acpi_register();
>> return 0;
>> @@ -166,12 +221,12 @@ int drm_sysfs_init(void)
>> */
>> void drm_sysfs_destroy(void)
>> {
>> - if (IS_ERR_OR_NULL(drm_class))
>> + if (!drm_class_initialised)
>> return;
>> drm_sysfs_acpi_unregister();
>> - class_remove_file(drm_class, &class_attr_version.attr);
>> - class_destroy(drm_class);
>> - drm_class = NULL;
>> + class_remove_file(&drm_class, &class_attr_version.attr);
>> + class_destroy(&drm_class);
>> + drm_class_initialised = false;
>> }
>>
>> static void drm_sysfs_release(struct device *dev)
>> @@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
>> return -ENOMEM;
>>
>> device_initialize(kdev);
>> - kdev->class = drm_class;
>> + kdev->class = &drm_class;
>> kdev->type = &drm_sysfs_device_connector;
>> kdev->parent = dev->primary->kdev;
>> kdev->groups = connector_dev_groups;
>> @@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>> minor_str = "card%d";
>>
>> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
>> - kdev->class = drm_class;
>> + kdev->class = &drm_class;
>> kdev->type = &drm_sysfs_device_minor;
>> }
>>
>> @@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>> */
>> int drm_class_device_register(struct device *dev)
>> {
>> - if (!drm_class || IS_ERR(drm_class))
>> + if (!drm_class_initialised)
>> return -ENOENT;
>>
>> - dev->class = drm_class;
>> + dev->class = &drm_class;
>> return device_register(dev);
>> }
>> EXPORT_SYMBOL_GPL(drm_class_device_register);
>>
>> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
>> --
>> 2.44.0
>>
Hi Tvrtko,
On 15.04.2024 13:50, Tvrtko Ursulin wrote:
>
> On 05/04/2024 18:59, Rob Clark wrote:
> > On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
> > <[email protected]> wrote:
> > >
> > > Up to this day, all fdinfo-based GPU profilers must traverse the entire
> > > /proc directory structure to find open DRM clients with fdinfo file
> > > descriptors. This is inefficient and time-consuming.
> > >
> > > This patch adds a new device class attribute that will install a sysfs file
> > > per DRM device, which can be queried by profilers to get a list of PIDs for
> > > their open clients. This file isn't human-readable, and it's meant to be
> > > queried only by GPU profilers like gputop and nvtop.
> > >
> > > Cc: Boris Brezillon <[email protected]>
> > > Cc: Tvrtko Ursulin <[email protected]>
> > > Cc: Christopher Healy <[email protected]>
> > > Signed-off-by: Adrián Larumbe <[email protected]>
> >
> > It does seem like a good idea.. idk if there is some precedent to
> > prefer binary vs ascii in sysfs, but having a way to avoid walking
> > _all_ processes is a good idea.
>
> I naturally second that it is a needed feature, but I do not think binary
> format is justified. AFAIR it should be used for things like hw/fw
> standardised tables or firmware images, not when exporting a simple list of
> PIDs. It also precludes easy shell/script access and the benefit of avoiding
> parsing a short list is I suspect completely dwarfed by needing to parse all
> the related fdinfo etc.
I'd rather keep it as a binary file for the sake of easily parsing the number
list on the client side, in gputop or nvtop. For textual access, there's already
a debugfs file that presents the same information, so I thought it was best not
to duplicate that functionality and restrict sysfs to serving the very specific
use case of UM profilers having to access the DRM client list.
I should mention I did something controversial here, which is a semantically
binary attribute through the regular attribute interface. I guess if I keep it
as a binary attribute in the end, I should switch over to the binary attribute
API.
Another reason why I implemented it as a binary file is that we can only send
back at most a whole page. If a PID takes 4 bytes, that's usually 1024 clients
at most, which is probably enough for any UM profiler, but will decrease even
more if we turn it into an ASCII readable file.
I did some research into sysfs binary attributes, and while some sources mention that
it's often used for dumping or loading of driver FW, none of them claim it cannot
be used for other purposes.
> > > ---
> > > drivers/gpu/drm/drm_internal.h | 2 +-
> > > drivers/gpu/drm/drm_privacy_screen.c | 2 +-
> > > drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
> > > 3 files changed, 74 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> > > index 2215baef9a3e..9a399b03d11c 100644
> > > --- a/drivers/gpu/drm/drm_internal.h
> > > +++ b/drivers/gpu/drm/drm_internal.h
> > > @@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
> > > void drm_master_internal_release(struct drm_device *dev);
> > >
> > > /* drm_sysfs.c */
> > > -extern struct class *drm_class;
> > > +extern struct class drm_class;
> > >
> > > int drm_sysfs_init(void);
> > > void drm_sysfs_destroy(void);
> > > diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
> > > index 6cc39e30781f..2fbd24ba5818 100644
> > > --- a/drivers/gpu/drm/drm_privacy_screen.c
> > > +++ b/drivers/gpu/drm/drm_privacy_screen.c
> > > @@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
> > > mutex_init(&priv->lock);
> > > BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
> > >
> > > - priv->dev.class = drm_class;
> > > + priv->dev.class = &drm_class;
> > > priv->dev.type = &drm_privacy_screen_type;
> > > priv->dev.parent = parent;
> > > priv->dev.release = drm_privacy_screen_device_release;
> > > diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
> > > index a953f69a34b6..56ca9e22c720 100644
> > > --- a/drivers/gpu/drm/drm_sysfs.c
> > > +++ b/drivers/gpu/drm/drm_sysfs.c
> > > @@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
> > > .name = "drm_connector",
> > > };
> > >
> > > -struct class *drm_class;
> > > -
> > > #ifdef CONFIG_ACPI
> > > static bool drm_connector_acpi_bus_match(struct device *dev)
> > > {
> > > @@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
> > >
> > > static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
> > >
> > > +static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
> > > +{
> > > + struct drm_minor *minor = cd->driver_data;
> > > + struct drm_device *ddev = minor->dev;
> > > + struct drm_file *priv;
> > > + ssize_t offset = 0;
> > > + void *pid_buf;
> > > +
> > > + if (minor->type != DRM_MINOR_RENDER)
> > > + return 0;
>
> Why this?
I return nothing in case of a non-render node because we don't want display drivers
to confuse UM GPU profilers.
> > > +
> > > + pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
>
> I don't quite get the kvmalloc for just one page (or why even a temporay
> buffer and not write into buf directly?).
Should've used kmalloc, you're right. Or else I could just write everything straight into 'buf'.
> > > + if (!pid_buf)
> > > + return 0;
> > > +
> > > + mutex_lock(&ddev->filelist_mutex);
> > > + list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
> > > + struct pid *pid;
> > > +
> > > + if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
> > > + break;
>
> Feels bad.. I would suggest exploring implementing a read callback (instead of
> show) and handling arbitrary size output.
I think regular class attributes can only implement show() and set(). For a more complex
interface, I would have to turn it into an actual binary attribute, and that would be the only
choice if we want the list of clients to be of arbitrary size.
> > > +
> > > + rcu_read_lock();
> > > + pid = rcu_dereference(priv->pid);
> > > + (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
> > > + rcu_read_unlock();
> > > +
> > > + offset += sizeof(pid_t);
> > > + }
> > > + mutex_unlock(&ddev->filelist_mutex);
> > > +
> > > + if (offset < PAGE_SIZE)
> > > + (*(pid_t *)(pid_buf + offset)) = 0;
>
> Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
> terminated feels weird. If I got that right.
>
> For me everything points towards going for text output.
Yes, I know it might sound weird, but my reasoning was: either there are PAGE_SIZE/sizeof(pid) entries
and the file isn't NULL terminated (which should be picked up by clients as being one page worth
of data, the sysfs attribute maximum transfer unit), or else there aren't enough entries to fill
a page and after the last one there's a NULL entry.
> > > +
> > > + memcpy(buf, pid_buf, offset);
> > > +
> > > + kvfree(pid_buf);
> > > +
> > > + return offset;
> > > +
> > > +}
> > > +static DEVICE_ATTR_RO(clients);
>
> Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
Like I said above, I sort of faked a binary attribute through the regular sysfs attr API,
which is most likely a bad idea.
> Regards,
>
> Tvrtko
>
> P.S. Or maybe it is time for drmfs? Where each client gets a directory and
> drivers can populate files. Such as per client logging streams and whatnot.
Yes, but maybe this is something we can discuss in depth in an RFC at a later time?
> > > +
> > > +static struct attribute *drm_device_attrs[] = {
> > > + &dev_attr_clients.attr,
> > > + NULL,
> > > +};
> > > +ATTRIBUTE_GROUPS(drm_device);
> > > +
> > > +struct class drm_class = {
> > > + .name = "drm",
> > > + .dev_groups = drm_device_groups,
> > > +};
> > > +
> > > +static bool drm_class_initialised;
> > > +
> > > /**
> > > * drm_sysfs_init - initialize sysfs helpers
> > > *
> > > @@ -142,18 +196,19 @@ int drm_sysfs_init(void)
> > > {
> > > int err;
> > >
> > > - drm_class = class_create("drm");
> > > - if (IS_ERR(drm_class))
> > > - return PTR_ERR(drm_class);
> > > + err = class_register(&drm_class);
> > > + if (err)
> > > + return err;
> > >
> > > - err = class_create_file(drm_class, &class_attr_version.attr);
> > > + err = class_create_file(&drm_class, &class_attr_version.attr);
> > > if (err) {
> > > - class_destroy(drm_class);
> > > - drm_class = NULL;
> > > + class_destroy(&drm_class);
> > > return err;
> > > }
> > >
> > > - drm_class->devnode = drm_devnode;
> > > + drm_class.devnode = drm_devnode;
> > > +
> > > + drm_class_initialised = true;
> > >
> > > drm_sysfs_acpi_register();
> > > return 0;
> > > @@ -166,12 +221,12 @@ int drm_sysfs_init(void)
> > > */
> > > void drm_sysfs_destroy(void)
> > > {
> > > - if (IS_ERR_OR_NULL(drm_class))
> > > + if (!drm_class_initialised)
> > > return;
> > > drm_sysfs_acpi_unregister();
> > > - class_remove_file(drm_class, &class_attr_version.attr);
> > > - class_destroy(drm_class);
> > > - drm_class = NULL;
> > > + class_remove_file(&drm_class, &class_attr_version.attr);
> > > + class_destroy(&drm_class);
> > > + drm_class_initialised = false;
> > > }
> > >
> > > static void drm_sysfs_release(struct device *dev)
> > > @@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
> > > return -ENOMEM;
> > >
> > > device_initialize(kdev);
> > > - kdev->class = drm_class;
> > > + kdev->class = &drm_class;
> > > kdev->type = &drm_sysfs_device_connector;
> > > kdev->parent = dev->primary->kdev;
> > > kdev->groups = connector_dev_groups;
> > > @@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> > > minor_str = "card%d";
> > >
> > > kdev->devt = MKDEV(DRM_MAJOR, minor->index);
> > > - kdev->class = drm_class;
> > > + kdev->class = &drm_class;
> > > kdev->type = &drm_sysfs_device_minor;
> > > }
> > >
> > > @@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> > > */
> > > int drm_class_device_register(struct device *dev)
> > > {
> > > - if (!drm_class || IS_ERR(drm_class))
> > > + if (!drm_class_initialised)
> > > return -ENOENT;
> > >
> > > - dev->class = drm_class;
> > > + dev->class = &drm_class;
> > > return device_register(dev);
> > > }
> > > EXPORT_SYMBOL_GPL(drm_class_device_register);
> > >
> > > base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> > > --
> > > 2.44.0
> > >
Adrian Larumbe
On Wed, May 01, 2024 at 04:58:05PM GMT, Tvrtko Ursulin wrote:
>
>Hi,
>
>On 24/04/2024 15:48, Adrián Larumbe wrote:
>>Hi Tvrtko,
>>
>>On 15.04.2024 13:50, Tvrtko Ursulin wrote:
>>>
>>>On 05/04/2024 18:59, Rob Clark wrote:
>>>>On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
>>>><[email protected]> wrote:
>>>>>
>>>>>Up to this day, all fdinfo-based GPU profilers must traverse the entire
>>>>>/proc directory structure to find open DRM clients with fdinfo file
>>>>>descriptors. This is inefficient and time-consuming.
>>>>>
>>>>>This patch adds a new device class attribute that will install a sysfs file
>>>>>per DRM device, which can be queried by profilers to get a list of PIDs for
>>>>>their open clients. This file isn't human-readable, and it's meant to be
>>>>>queried only by GPU profilers like gputop and nvtop.
>>>>>
>>>>>Cc: Boris Brezillon <[email protected]>
>>>>>Cc: Tvrtko Ursulin <[email protected]>
>>>>>Cc: Christopher Healy <[email protected]>
>>>>>Signed-off-by: Adrián Larumbe <[email protected]>
>>>>
>>>>It does seem like a good idea.. idk if there is some precedent to
>>>>prefer binary vs ascii in sysfs, but having a way to avoid walking
>>>>_all_ processes is a good idea.
>>>
>>>I naturally second that it is a needed feature, but I do not think binary
>>>format is justified. AFAIR it should be used for things like hw/fw
>>>standardised tables or firmware images, not when exporting a simple list of
>>>PIDs. It also precludes easy shell/script access and the benefit of avoiding
>>>parsing a short list is I suspect completely dwarfed by needing to parse all
>>>the related fdinfo etc.
>>
>>I'd rather keep it as a binary file for the sake of easily parsing the number
>>list on the client side, in gputop or nvtop. For textual access, there's already
>>a debugfs file that presents the same information, so I thought it was best not
>>to duplicate that functionality and restrict sysfs to serving the very specific
>>use case of UM profilers having to access the DRM client list.
>>
>>I should mention I did something controversial here, which is a semantically
>>binary attribute through the regular attribute interface. I guess if I keep it
>>as a binary attribute in the end, I should switch over to the binary attribute
>>API.
>>
>>Another reason why I implemented it as a binary file is that we can only send
>>back at most a whole page. If a PID takes 4 bytes, that's usually 1024 clients
>>at most, which is probably enough for any UM profiler, but will decrease even
>>more if we turn it into an ASCII readable file.
>
>I'm afraid I still think there is no reason for a binary file, even
>less so artificially limited to 1024 clients. Any consumer will have
>to parse text fdinfo so a binary list of pids is not adding any real
>cost.
yeah, I don't really understand why you'd want the binary number that
you'd then have to turn into a string to open the /proc/<pid>/. To me it
sounds more like we want a text output and that output to be:
<pid>/fdinfo/<fd>
So gputop could just read this file to know where the info is.
Too bad we can't symlink cross fs, otherwise we could just add symlinks
to e.g. /sys/class/drm/card<N>/clients/*, which then nicely separate it
per gpu too.
But see below.
>
>>I did some research into sysfs binary attributes, and while some sources mention that
>>it's often used for dumping or loading of driver FW, none of them claim it cannot
>>be used for other purposes.
>>
>>>>>---
>>>>> drivers/gpu/drm/drm_internal.h | 2 +-
>>>>> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
>>>>> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
>>>>> 3 files changed, 74 insertions(+), 19 deletions(-)
>>>>>
>>>>>diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>>index 2215baef9a3e..9a399b03d11c 100644
>>>>>--- a/drivers/gpu/drm/drm_internal.h
>>>>>+++ b/drivers/gpu/drm/drm_internal.h
>>>>>@@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
>>>>> void drm_master_internal_release(struct drm_device *dev);
>>>>>
>>>>> /* drm_sysfs.c */
>>>>>-extern struct class *drm_class;
>>>>>+extern struct class drm_class;
>>>>>
>>>>> int drm_sysfs_init(void);
>>>>> void drm_sysfs_destroy(void);
>>>>>diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
>>>>>index 6cc39e30781f..2fbd24ba5818 100644
>>>>>--- a/drivers/gpu/drm/drm_privacy_screen.c
>>>>>+++ b/drivers/gpu/drm/drm_privacy_screen.c
>>>>>@@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
>>>>> mutex_init(&priv->lock);
>>>>> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>>>>>
>>>>>- priv->dev.class = drm_class;
>>>>>+ priv->dev.class = &drm_class;
>>>>> priv->dev.type = &drm_privacy_screen_type;
>>>>> priv->dev.parent = parent;
>>>>> priv->dev.release = drm_privacy_screen_device_release;
>>>>>diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
>>>>>index a953f69a34b6..56ca9e22c720 100644
>>>>>--- a/drivers/gpu/drm/drm_sysfs.c
>>>>>+++ b/drivers/gpu/drm/drm_sysfs.c
>>>>>@@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
>>>>> .name = "drm_connector",
>>>>> };
>>>>>
>>>>>-struct class *drm_class;
>>>>>-
>>>>> #ifdef CONFIG_ACPI
>>>>> static bool drm_connector_acpi_bus_match(struct device *dev)
>>>>> {
>>>>>@@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>>>>>
>>>>> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>>>>>
>>>>>+static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
>>>>>+{
>>>>>+ struct drm_minor *minor = cd->driver_data;
>>>>>+ struct drm_device *ddev = minor->dev;
>>>>>+ struct drm_file *priv;
>>>>>+ ssize_t offset = 0;
>>>>>+ void *pid_buf;
>>>>>+
>>>>>+ if (minor->type != DRM_MINOR_RENDER)
>>>>>+ return 0;
>>>
>>>Why this?
>>
>>I return nothing in case of a non-render node because we don't want display drivers
>>to confuse UM GPU profilers.
>
>Feels to arbitrary to me. Let them handle it.
>
>>>>>+
>>>>>+ pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
>>>
>>>I don't quite get the kvmalloc for just one page (or why even a temporay
>>>buffer and not write into buf directly?).
>>
>>Should've used kmalloc, you're right. Or else I could just write everything straight into 'buf'.
>>
>>>>>+ if (!pid_buf)
>>>>>+ return 0;
>>>>>+
>>>>>+ mutex_lock(&ddev->filelist_mutex);
>>>>>+ list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
>>>>>+ struct pid *pid;
>>>>>+
>>>>>+ if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
>>>>>+ break;
>>>
>>>Feels bad.. I would suggest exploring implementing a read callback (instead of
>>>show) and handling arbitrary size output.
>>
>>I think regular class attributes can only implement show() and set(). For a more complex
>>interface, I would have to turn it into an actual binary attribute, and that would be the only
>>choice if we want the list of clients to be of arbitrary size.
>
>Yeah, i915 uses that to dump the error capture file which can be huge
>and is text so it is doable.
>
>>>>>+
>>>>>+ rcu_read_lock();
>>>>>+ pid = rcu_dereference(priv->pid);
>>>>>+ (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
>>>>>+ rcu_read_unlock();
>>>>>+
>>>>>+ offset += sizeof(pid_t);
>>>>>+ }
>>>>>+ mutex_unlock(&ddev->filelist_mutex);
>>>>>+
>>>>>+ if (offset < PAGE_SIZE)
>>>>>+ (*(pid_t *)(pid_buf + offset)) = 0;
>>>
>>>Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
>>>terminated feels weird. If I got that right.
>>>
>>>For me everything points towards going for text output.
>>
>>Yes, I know it might sound weird, but my reasoning was: either there are PAGE_SIZE/sizeof(pid) entries
>>and the file isn't NULL terminated (which should be picked up by clients as being one page worth
>>of data, the sysfs attribute maximum transfer unit), or else there aren't enough entries to fill
>>a page and after the last one there's a NULL entry.
>>
>>
>>>>>+
>>>>>+ memcpy(buf, pid_buf, offset);
>>>>>+
>>>>>+ kvfree(pid_buf);
>>>>>+
>>>>>+ return offset;
>>>>>+
>>>>>+}
>>>>>+static DEVICE_ATTR_RO(clients);
/proc/<pid>/fdinfo/ is only readable by the owner. if we report what are
the open fds (or even what are the pids with a drm fd), we are
leaking that info. So we should probably make this
DEVICE_ATTR_ADMIN_RO.
Lucas De Marchi
>>>
>>>Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
>>
>>Like I said above, I sort of faked a binary attribute through the regular sysfs attr API,
>>which is most likely a bad idea.
>>
>>>Regards,
>>>
>>>Tvrtko
>>>
>>>P.S. Or maybe it is time for drmfs? Where each client gets a directory and
>>>drivers can populate files. Such as per client logging streams and whatnot.
>>
>>Yes, but maybe this is something we can discuss in depth in an RFC at a later time?
>
>Yes of course, it is just a long standing idea for flexible per client
>stuff.
>
>Regards,
>
>Tvrtko
>
>>
>>>>>+
>>>>>+static struct attribute *drm_device_attrs[] = {
>>>>>+ &dev_attr_clients.attr,
>>>>>+ NULL,
>>>>>+};
>>>>>+ATTRIBUTE_GROUPS(drm_device);
>>>>>+
>>>>>+struct class drm_class = {
>>>>>+ .name = "drm",
>>>>>+ .dev_groups = drm_device_groups,
>>>>>+};
>>>>>+
>>>>>+static bool drm_class_initialised;
>>>>>+
>>>>> /**
>>>>> * drm_sysfs_init - initialize sysfs helpers
>>>>> *
>>>>>@@ -142,18 +196,19 @@ int drm_sysfs_init(void)
>>>>> {
>>>>> int err;
>>>>>
>>>>>- drm_class = class_create("drm");
>>>>>- if (IS_ERR(drm_class))
>>>>>- return PTR_ERR(drm_class);
>>>>>+ err = class_register(&drm_class);
>>>>>+ if (err)
>>>>>+ return err;
>>>>>
>>>>>- err = class_create_file(drm_class, &class_attr_version.attr);
>>>>>+ err = class_create_file(&drm_class, &class_attr_version.attr);
>>>>> if (err) {
>>>>>- class_destroy(drm_class);
>>>>>- drm_class = NULL;
>>>>>+ class_destroy(&drm_class);
>>>>> return err;
>>>>> }
>>>>>
>>>>>- drm_class->devnode = drm_devnode;
>>>>>+ drm_class.devnode = drm_devnode;
>>>>>+
>>>>>+ drm_class_initialised = true;
>>>>>
>>>>> drm_sysfs_acpi_register();
>>>>> return 0;
>>>>>@@ -166,12 +221,12 @@ int drm_sysfs_init(void)
>>>>> */
>>>>> void drm_sysfs_destroy(void)
>>>>> {
>>>>>- if (IS_ERR_OR_NULL(drm_class))
>>>>>+ if (!drm_class_initialised)
>>>>> return;
>>>>> drm_sysfs_acpi_unregister();
>>>>>- class_remove_file(drm_class, &class_attr_version.attr);
>>>>>- class_destroy(drm_class);
>>>>>- drm_class = NULL;
>>>>>+ class_remove_file(&drm_class, &class_attr_version.attr);
>>>>>+ class_destroy(&drm_class);
>>>>>+ drm_class_initialised = false;
>>>>> }
>>>>>
>>>>> static void drm_sysfs_release(struct device *dev)
>>>>>@@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
>>>>> return -ENOMEM;
>>>>>
>>>>> device_initialize(kdev);
>>>>>- kdev->class = drm_class;
>>>>>+ kdev->class = &drm_class;
>>>>> kdev->type = &drm_sysfs_device_connector;
>>>>> kdev->parent = dev->primary->kdev;
>>>>> kdev->groups = connector_dev_groups;
>>>>>@@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>>>>> minor_str = "card%d";
>>>>>
>>>>> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
>>>>>- kdev->class = drm_class;
>>>>>+ kdev->class = &drm_class;
>>>>> kdev->type = &drm_sysfs_device_minor;
>>>>> }
>>>>>
>>>>>@@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>>>>> */
>>>>> int drm_class_device_register(struct device *dev)
>>>>> {
>>>>>- if (!drm_class || IS_ERR(drm_class))
>>>>>+ if (!drm_class_initialised)
>>>>> return -ENOENT;
>>>>>
>>>>>- dev->class = drm_class;
>>>>>+ dev->class = &drm_class;
>>>>> return device_register(dev);
>>>>> }
>>>>> EXPORT_SYMBOL_GPL(drm_class_device_register);
>>>>>
>>>>>base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
>>>>>--
>>>>>2.44.0
>>>>>
>>
>>Adrian Larumbe
On Wed, May 1, 2024 at 9:19 AM Lucas De Marchi <[email protected]> wrote:
>
> On Wed, May 01, 2024 at 04:58:05PM GMT, Tvrtko Ursulin wrote:
> >
> >Hi,
> >
> >On 24/04/2024 15:48, Adrián Larumbe wrote:
> >>Hi Tvrtko,
> >>
> >>On 15.04.2024 13:50, Tvrtko Ursulin wrote:
> >>>
> >>>On 05/04/2024 18:59, Rob Clark wrote:
> >>>>On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
> >>>><[email protected]> wrote:
> >>>>>
> >>>>>Up to this day, all fdinfo-based GPU profilers must traverse the entire
> >>>>>/proc directory structure to find open DRM clients with fdinfo file
> >>>>>descriptors. This is inefficient and time-consuming.
> >>>>>
> >>>>>This patch adds a new device class attribute that will install a sysfs file
> >>>>>per DRM device, which can be queried by profilers to get a list of PIDs for
> >>>>>their open clients. This file isn't human-readable, and it's meant to be
> >>>>>queried only by GPU profilers like gputop and nvtop.
> >>>>>
> >>>>>Cc: Boris Brezillon <[email protected]>
> >>>>>Cc: Tvrtko Ursulin <[email protected]>
> >>>>>Cc: Christopher Healy <[email protected]>
> >>>>>Signed-off-by: Adrián Larumbe <[email protected]>
> >>>>
> >>>>It does seem like a good idea.. idk if there is some precedent to
> >>>>prefer binary vs ascii in sysfs, but having a way to avoid walking
> >>>>_all_ processes is a good idea.
> >>>
> >>>I naturally second that it is a needed feature, but I do not think binary
> >>>format is justified. AFAIR it should be used for things like hw/fw
> >>>standardised tables or firmware images, not when exporting a simple list of
> >>>PIDs. It also precludes easy shell/script access and the benefit of avoiding
> >>>parsing a short list is I suspect completely dwarfed by needing to parse all
> >>>the related fdinfo etc.
> >>
> >>I'd rather keep it as a binary file for the sake of easily parsing the number
> >>list on the client side, in gputop or nvtop. For textual access, there's already
> >>a debugfs file that presents the same information, so I thought it was best not
> >>to duplicate that functionality and restrict sysfs to serving the very specific
> >>use case of UM profilers having to access the DRM client list.
> >>
> >>I should mention I did something controversial here, which is a semantically
> >>binary attribute through the regular attribute interface. I guess if I keep it
> >>as a binary attribute in the end, I should switch over to the binary attribute
> >>API.
> >>
> >>Another reason why I implemented it as a binary file is that we can only send
> >>back at most a whole page. If a PID takes 4 bytes, that's usually 1024 clients
> >>at most, which is probably enough for any UM profiler, but will decrease even
> >>more if we turn it into an ASCII readable file.
> >
> >I'm afraid I still think there is no reason for a binary file, even
> >less so artificially limited to 1024 clients. Any consumer will have
> >to parse text fdinfo so a binary list of pids is not adding any real
> >cost.
>
> yeah, I don't really understand why you'd want the binary number that
> you'd then have to turn into a string to open the /proc/<pid>/. To me it
> sounds more like we want a text output and that output to be:
>
> <pid>/fdinfo/<fd>
>
> So gputop could just read this file to know where the info is.
> Too bad we can't symlink cross fs, otherwise we could just add symlinks
> to e.g. /sys/class/drm/card<N>/clients/*, which then nicely separate it
> per gpu too.
>
> But see below.
>
> >
> >>I did some research into sysfs binary attributes, and while some sources mention that
> >>it's often used for dumping or loading of driver FW, none of them claim it cannot
> >>be used for other purposes.
> >>
> >>>>>---
> >>>>> drivers/gpu/drm/drm_internal.h | 2 +-
> >>>>> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
> >>>>> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
> >>>>> 3 files changed, 74 insertions(+), 19 deletions(-)
> >>>>>
> >>>>>diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> >>>>>index 2215baef9a3e..9a399b03d11c 100644
> >>>>>--- a/drivers/gpu/drm/drm_internal.h
> >>>>>+++ b/drivers/gpu/drm/drm_internal.h
> >>>>>@@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
> >>>>> void drm_master_internal_release(struct drm_device *dev);
> >>>>>
> >>>>> /* drm_sysfs.c */
> >>>>>-extern struct class *drm_class;
> >>>>>+extern struct class drm_class;
> >>>>>
> >>>>> int drm_sysfs_init(void);
> >>>>> void drm_sysfs_destroy(void);
> >>>>>diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
> >>>>>index 6cc39e30781f..2fbd24ba5818 100644
> >>>>>--- a/drivers/gpu/drm/drm_privacy_screen.c
> >>>>>+++ b/drivers/gpu/drm/drm_privacy_screen.c
> >>>>>@@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
> >>>>> mutex_init(&priv->lock);
> >>>>> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
> >>>>>
> >>>>>- priv->dev.class = drm_class;
> >>>>>+ priv->dev.class = &drm_class;
> >>>>> priv->dev.type = &drm_privacy_screen_type;
> >>>>> priv->dev.parent = parent;
> >>>>> priv->dev.release = drm_privacy_screen_device_release;
> >>>>>diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfsc
> >>>>>index a953f69a34b6..56ca9e22c720 100644
> >>>>>--- a/drivers/gpu/drm/drm_sysfs.c
> >>>>>+++ b/drivers/gpu/drm/drm_sysfs.c
> >>>>>@@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
> >>>>> .name = "drm_connector",
> >>>>> };
> >>>>>
> >>>>>-struct class *drm_class;
> >>>>>-
> >>>>> #ifdef CONFIG_ACPI
> >>>>> static bool drm_connector_acpi_bus_match(struct device *dev)
> >>>>> {
> >>>>>@@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
> >>>>>
> >>>>> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
> >>>>>
> >>>>>+static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
> >>>>>+{
> >>>>>+ struct drm_minor *minor = cd->driver_data;
> >>>>>+ struct drm_device *ddev = minor->dev;
> >>>>>+ struct drm_file *priv;
> >>>>>+ ssize_t offset = 0;
> >>>>>+ void *pid_buf;
> >>>>>+
> >>>>>+ if (minor->type != DRM_MINOR_RENDER)
> >>>>>+ return 0;
> >>>
> >>>Why this?
> >>
> >>I return nothing in case of a non-render node because we don't want display drivers
> >>to confuse UM GPU profilers.
> >
> >Feels to arbitrary to me. Let them handle it.
> >
> >>>>>+
> >>>>>+ pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
> >>>
> >>>I don't quite get the kvmalloc for just one page (or why even a temporay
> >>>buffer and not write into buf directly?).
> >>
> >>Should've used kmalloc, you're right. Or else I could just write everything straight into 'buf'.
> >>
> >>>>>+ if (!pid_buf)
> >>>>>+ return 0;
> >>>>>+
> >>>>>+ mutex_lock(&ddev->filelist_mutex);
> >>>>>+ list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
> >>>>>+ struct pid *pid;
> >>>>>+
> >>>>>+ if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
> >>>>>+ break;
> >>>
> >>>Feels bad.. I would suggest exploring implementing a read callback (instead of
> >>>show) and handling arbitrary size output.
> >>
> >>I think regular class attributes can only implement show() and set(). For a more complex
> >>interface, I would have to turn it into an actual binary attribute, and that would be the only
> >>choice if we want the list of clients to be of arbitrary size.
> >
> >Yeah, i915 uses that to dump the error capture file which can be huge
> >and is text so it is doable.
> >
> >>>>>+
> >>>>>+ rcu_read_lock();
> >>>>>+ pid = rcu_dereference(priv->pid);
> >>>>>+ (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
> >>>>>+ rcu_read_unlock();
> >>>>>+
> >>>>>+ offset += sizeof(pid_t);
> >>>>>+ }
> >>>>>+ mutex_unlock(&ddev->filelist_mutex);
> >>>>>+
> >>>>>+ if (offset < PAGE_SIZE)
> >>>>>+ (*(pid_t *)(pid_buf + offset)) = 0;
> >>>
> >>>Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
> >>>terminated feels weird. If I got that right.
> >>>
> >>>For me everything points towards going for text output.
> >>
> >>Yes, I know it might sound weird, but my reasoning was: either there are PAGE_SIZE/sizeof(pid) entries
> >>and the file isn't NULL terminated (which should be picked up by clients as being one page worth
> >>of data, the sysfs attribute maximum transfer unit), or else there aren't enough entries to fill
> >>a page and after the last one there's a NULL entry.
> >>
> >>
> >>>>>+
> >>>>>+ memcpy(buf, pid_buf, offset);
> >>>>>+
> >>>>>+ kvfree(pid_buf);
> >>>>>+
> >>>>>+ return offset;
> >>>>>+
> >>>>>+}
> >>>>>+static DEVICE_ATTR_RO(clients);
>
>
> /proc/<pid>/fdinfo/ is only readable by the owner. if we report what are
> the open fds (or even what are the pids with a drm fd), we are
> leaking that info. So we should probably make this
> DEVICE_ATTR_ADMIN_RO.
I think this is an argument for _only_ listing the PID and not full
paths. I have a use-case where I'd prefer it not to be
DEVICE_ATTR_ADMIN_RO
That said, I think I do prefer txt rather than binary. If nothing
else, it makes it easier to deal with for scripting, which is also
useful sometimes.
BR,
-R
> Lucas De Marchi
>
> >>>
> >>>Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
> >>
> >>Like I said above, I sort of faked a binary attribute through the regular sysfs attr API,
> >>which is most likely a bad idea.
> >>
> >>>Regards,
> >>>
> >>>Tvrtko
> >>>
> >>>P.S. Or maybe it is time for drmfs? Where each client gets a directory and
> >>>drivers can populate files. Such as per client logging streams and whatnot.
> >>
> >>Yes, but maybe this is something we can discuss in depth in an RFC at a later time?
> >
> >Yes of course, it is just a long standing idea for flexible per client
> >stuff.
> >
> >Regards,
> >
> >Tvrtko
> >
> >>
> >>>>>+
> >>>>>+static struct attribute *drm_device_attrs[] = {
> >>>>>+ &dev_attr_clients.attr,
> >>>>>+ NULL,
> >>>>>+};
> >>>>>+ATTRIBUTE_GROUPS(drm_device);
> >>>>>+
> >>>>>+struct class drm_class = {
> >>>>>+ .name = "drm",
> >>>>>+ .dev_groups = drm_device_groups,
> >>>>>+};
> >>>>>+
> >>>>>+static bool drm_class_initialised;
> >>>>>+
> >>>>> /**
> >>>>> * drm_sysfs_init - initialize sysfs helpers
> >>>>> *
> >>>>>@@ -142,18 +196,19 @@ int drm_sysfs_init(void)
> >>>>> {
> >>>>> int err;
> >>>>>
> >>>>>- drm_class = class_create("drm");
> >>>>>- if (IS_ERR(drm_class))
> >>>>>- return PTR_ERR(drm_class);
> >>>>>+ err = class_register(&drm_class);
> >>>>>+ if (err)
> >>>>>+ return err;
> >>>>>
> >>>>>- err = class_create_file(drm_class, &class_attr_version.attr);
> >>>>>+ err = class_create_file(&drm_class, &class_attr_version.attr);
> >>>>> if (err) {
> >>>>>- class_destroy(drm_class);
> >>>>>- drm_class = NULL;
> >>>>>+ class_destroy(&drm_class);
> >>>>> return err;
> >>>>> }
> >>>>>
> >>>>>- drm_class->devnode = drm_devnode;
> >>>>>+ drm_class.devnode = drm_devnode;
> >>>>>+
> >>>>>+ drm_class_initialised = true;
> >>>>>
> >>>>> drm_sysfs_acpi_register();
> >>>>> return 0;
> >>>>>@@ -166,12 +221,12 @@ int drm_sysfs_init(void)
> >>>>> */
> >>>>> void drm_sysfs_destroy(void)
> >>>>> {
> >>>>>- if (IS_ERR_OR_NULL(drm_class))
> >>>>>+ if (!drm_class_initialised)
> >>>>> return;
> >>>>> drm_sysfs_acpi_unregister();
> >>>>>- class_remove_file(drm_class, &class_attr_version.attr);
> >>>>>- class_destroy(drm_class);
> >>>>>- drm_class = NULL;
> >>>>>+ class_remove_file(&drm_class, &class_attr_version.attr);
> >>>>>+ class_destroy(&drm_class);
> >>>>>+ drm_class_initialised = false;
> >>>>> }
> >>>>>
> >>>>> static void drm_sysfs_release(struct device *dev)
> >>>>>@@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
> >>>>> return -ENOMEM;
> >>>>>
> >>>>> device_initialize(kdev);
> >>>>>- kdev->class = drm_class;
> >>>>>+ kdev->class = &drm_class;
> >>>>> kdev->type = &drm_sysfs_device_connector;
> >>>>> kdev->parent = dev->primary->kdev;
> >>>>> kdev->groups = connector_dev_groups;
> >>>>>@@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> >>>>> minor_str = "card%d";
> >>>>>
> >>>>> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
> >>>>>- kdev->class = drm_class;
> >>>>>+ kdev->class = &drm_class;
> >>>>> kdev->type = &drm_sysfs_device_minor;
> >>>>> }
> >>>>>
> >>>>>@@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> >>>>> */
> >>>>> int drm_class_device_register(struct device *dev)
> >>>>> {
> >>>>>- if (!drm_class || IS_ERR(drm_class))
> >>>>>+ if (!drm_class_initialised)
> >>>>> return -ENOENT;
> >>>>>
> >>>>>- dev->class = drm_class;
> >>>>>+ dev->class = &drm_class;
> >>>>> return device_register(dev);
> >>>>> }
> >>>>> EXPORT_SYMBOL_GPL(drm_class_device_register);
> >>>>>
> >>>>>base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> >>>>>--
> >>>>>2.44.0
> >>>>>
> >>
> >>Adrian Larumbe
On Wed, Apr 03, 2024 at 07:29:39PM +0100, Adri?n Larumbe wrote:
> Up to this day, all fdinfo-based GPU profilers must traverse the entire
> /proc directory structure to find open DRM clients with fdinfo file
> descriptors. This is inefficient and time-consuming.
>
> This patch adds a new device class attribute that will install a sysfs file
> per DRM device, which can be queried by profilers to get a list of PIDs for
> their open clients. This file isn't human-readable, and it's meant to be
> queried only by GPU profilers like gputop and nvtop.
>
> Cc: Boris Brezillon <[email protected]>
> Cc: Tvrtko Ursulin <[email protected]>
> Cc: Christopher Healy <[email protected]>
> Signed-off-by: Adri?n Larumbe <[email protected]>
Tvrtko pointed me at this on irc and .. uh I think this definitely needs
an ack from Greg KH before we can land it. It's quite far away from what
sysfs uapi usually looks like and also semantically does ...
-Sima
> ---
> drivers/gpu/drm/drm_internal.h | 2 +-
> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
> 3 files changed, 74 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 2215baef9a3e..9a399b03d11c 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
> void drm_master_internal_release(struct drm_device *dev);
>
> /* drm_sysfs.c */
> -extern struct class *drm_class;
> +extern struct class drm_class;
>
> int drm_sysfs_init(void);
> void drm_sysfs_destroy(void);
> diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
> index 6cc39e30781f..2fbd24ba5818 100644
> --- a/drivers/gpu/drm/drm_privacy_screen.c
> +++ b/drivers/gpu/drm/drm_privacy_screen.c
> @@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
> mutex_init(&priv->lock);
> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>
> - priv->dev.class = drm_class;
> + priv->dev.class = &drm_class;
> priv->dev.type = &drm_privacy_screen_type;
> priv->dev.parent = parent;
> priv->dev.release = drm_privacy_screen_device_release;
> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
> index a953f69a34b6..56ca9e22c720 100644
> --- a/drivers/gpu/drm/drm_sysfs.c
> +++ b/drivers/gpu/drm/drm_sysfs.c
> @@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
> .name = "drm_connector",
> };
>
> -struct class *drm_class;
> -
> #ifdef CONFIG_ACPI
> static bool drm_connector_acpi_bus_match(struct device *dev)
> {
> @@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>
> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>
> +static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
> +{
> + struct drm_minor *minor = cd->driver_data;
> + struct drm_device *ddev = minor->dev;
> + struct drm_file *priv;
> + ssize_t offset = 0;
> + void *pid_buf;
> +
> + if (minor->type != DRM_MINOR_RENDER)
> + return 0;
> +
> + pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!pid_buf)
> + return 0;
> +
> + mutex_lock(&ddev->filelist_mutex);
> + list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
> + struct pid *pid;
> +
> + if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
> + break;
> +
> + rcu_read_lock();
> + pid = rcu_dereference(priv->pid);
> + (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
> + rcu_read_unlock();
> +
> + offset += sizeof(pid_t);
> + }
> + mutex_unlock(&ddev->filelist_mutex);
> +
> + if (offset < PAGE_SIZE)
> + (*(pid_t *)(pid_buf + offset)) = 0;
> +
> + memcpy(buf, pid_buf, offset);
> +
> + kvfree(pid_buf);
> +
> + return offset;
> +
> +}
> +static DEVICE_ATTR_RO(clients);
> +
> +static struct attribute *drm_device_attrs[] = {
> + &dev_attr_clients.attr,
> + NULL,
> +};
> +ATTRIBUTE_GROUPS(drm_device);
> +
> +struct class drm_class = {
> + .name = "drm",
> + .dev_groups = drm_device_groups,
> +};
> +
> +static bool drm_class_initialised;
> +
> /**
> * drm_sysfs_init - initialize sysfs helpers
> *
> @@ -142,18 +196,19 @@ int drm_sysfs_init(void)
> {
> int err;
>
> - drm_class = class_create("drm");
> - if (IS_ERR(drm_class))
> - return PTR_ERR(drm_class);
> + err = class_register(&drm_class);
> + if (err)
> + return err;
>
> - err = class_create_file(drm_class, &class_attr_version.attr);
> + err = class_create_file(&drm_class, &class_attr_version.attr);
> if (err) {
> - class_destroy(drm_class);
> - drm_class = NULL;
> + class_destroy(&drm_class);
> return err;
> }
>
> - drm_class->devnode = drm_devnode;
> + drm_class.devnode = drm_devnode;
> +
> + drm_class_initialised = true;
>
> drm_sysfs_acpi_register();
> return 0;
> @@ -166,12 +221,12 @@ int drm_sysfs_init(void)
> */
> void drm_sysfs_destroy(void)
> {
> - if (IS_ERR_OR_NULL(drm_class))
> + if (!drm_class_initialised)
> return;
> drm_sysfs_acpi_unregister();
> - class_remove_file(drm_class, &class_attr_version.attr);
> - class_destroy(drm_class);
> - drm_class = NULL;
> + class_remove_file(&drm_class, &class_attr_version.attr);
> + class_destroy(&drm_class);
> + drm_class_initialised = false;
> }
>
> static void drm_sysfs_release(struct device *dev)
> @@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
> return -ENOMEM;
>
> device_initialize(kdev);
> - kdev->class = drm_class;
> + kdev->class = &drm_class;
> kdev->type = &drm_sysfs_device_connector;
> kdev->parent = dev->primary->kdev;
> kdev->groups = connector_dev_groups;
> @@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> minor_str = "card%d";
>
> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
> - kdev->class = drm_class;
> + kdev->class = &drm_class;
> kdev->type = &drm_sysfs_device_minor;
> }
>
> @@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
> */
> int drm_class_device_register(struct device *dev)
> {
> - if (!drm_class || IS_ERR(drm_class))
> + if (!drm_class_initialised)
> return -ENOENT;
>
> - dev->class = drm_class;
> + dev->class = &drm_class;
> return device_register(dev);
> }
> EXPORT_SYMBOL_GPL(drm_class_device_register);
>
> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
> --
> 2.44.0
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
On Wed, May 01, 2024 at 09:25:01AM GMT, Rob Clark wrote:
>On Wed, May 1, 2024 at 9:19 AM Lucas De Marchi <[email protected]> wrote:
>>
>> On Wed, May 01, 2024 at 04:58:05PM GMT, Tvrtko Ursulin wrote:
>> >
>> >Hi,
>> >
>> >On 24/04/2024 15:48, Adrián Larumbe wrote:
>> >>Hi Tvrtko,
>> >>
>> >>On 15.04.2024 13:50, Tvrtko Ursulin wrote:
>> >>>
>> >>>On 05/04/2024 18:59, Rob Clark wrote:
>> >>>>On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
>> >>>><[email protected]> wrote:
>> >>>>>
>> >>>>>Up to this day, all fdinfo-based GPU profilers must traverse the entire
>> >>>>>/proc directory structure to find open DRM clients with fdinfo file
>> >>>>>descriptors. This is inefficient and time-consuming.
>> >>>>>
>> >>>>>This patch adds a new device class attribute that will install a sysfs file
>> >>>>>per DRM device, which can be queried by profilers to get a list of PIDs for
>> >>>>>their open clients. This file isn't human-readable, and it's meant to be
>> >>>>>queried only by GPU profilers like gputop and nvtop.
>> >>>>>
>> >>>>>Cc: Boris Brezillon <[email protected]>
>> >>>>>Cc: Tvrtko Ursulin <[email protected]>
>> >>>>>Cc: Christopher Healy <[email protected]>
>> >>>>>Signed-off-by: Adrián Larumbe <[email protected]>
>> >>>>
>> >>>>It does seem like a good idea.. idk if there is some precedent to
>> >>>>prefer binary vs ascii in sysfs, but having a way to avoid walking
>> >>>>_all_ processes is a good idea.
>> >>>
>> >>>I naturally second that it is a needed feature, but I do not think binary
>> >>>format is justified. AFAIR it should be used for things like hw/fw
>> >>>standardised tables or firmware images, not when exporting a simple list of
>> >>>PIDs. It also precludes easy shell/script access and the benefit of avoiding
>> >>>parsing a short list is I suspect completely dwarfed by needing to parse all
>> >>>the related fdinfo etc.
>> >>
>> >>I'd rather keep it as a binary file for the sake of easily parsing the number
>> >>list on the client side, in gputop or nvtop. For textual access, there's already
>> >>a debugfs file that presents the same information, so I thought it was best not
>> >>to duplicate that functionality and restrict sysfs to serving the very specific
>> >>use case of UM profilers having to access the DRM client list.
>> >>
>> >>I should mention I did something controversial here, which is a semantically
>> >>binary attribute through the regular attribute interface. I guess if I keep it
>> >>as a binary attribute in the end, I should switch over to the binary attribute
>> >>API.
>> >>
>> >>Another reason why I implemented it as a binary file is that we can only send
>> >>back at most a whole page. If a PID takes 4 bytes, that's usually 1024 clients
>> >>at most, which is probably enough for any UM profiler, but will decrease even
>> >>more if we turn it into an ASCII readable file.
>> >
>> >I'm afraid I still think there is no reason for a binary file, even
>> >less so artificially limited to 1024 clients. Any consumer will have
>> >to parse text fdinfo so a binary list of pids is not adding any real
>> >cost.
>>
>> yeah, I don't really understand why you'd want the binary number that
>> you'd then have to turn into a string to open the /proc/<pid>/. To me it
>> sounds more like we want a text output and that output to be:
>>
>> <pid>/fdinfo/<fd>
>>
>> So gputop could just read this file to know where the info is.
>> Too bad we can't symlink cross fs, otherwise we could just add symlinks
>> to e.g. /sys/class/drm/card<N>/clients/*, which then nicely separate it
>> per gpu too.
>>
>> But see below.
>>
>> >
>> >>I did some research into sysfs binary attributes, and while some sources mention that
>> >>it's often used for dumping or loading of driver FW, none of them claim it cannot
>> >>be used for other purposes.
>> >>
>> >>>>>---
>> >>>>> drivers/gpu/drm/drm_internal.h | 2 +-
>> >>>>> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
>> >>>>> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
>> >>>>> 3 files changed, 74 insertions(+), 19 deletions(-)
>> >>>>>
>> >>>>>diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> >>>>>index 2215baef9a3e..9a399b03d11c 100644
>> >>>>>--- a/drivers/gpu/drm/drm_internal.h
>> >>>>>+++ b/drivers/gpu/drm/drm_internal.h
>> >>>>>@@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
>> >>>>> void drm_master_internal_release(struct drm_device *dev);
>> >>>>>
>> >>>>> /* drm_sysfs.c */
>> >>>>>-extern struct class *drm_class;
>> >>>>>+extern struct class drm_class;
>> >>>>>
>> >>>>> int drm_sysfs_init(void);
>> >>>>> void drm_sysfs_destroy(void);
>> >>>>>diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
>> >>>>>index 6cc39e30781f..2fbd24ba5818 100644
>> >>>>>--- a/drivers/gpu/drm/drm_privacy_screen.c
>> >>>>>+++ b/drivers/gpu/drm/drm_privacy_screen.c
>> >>>>>@@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
>> >>>>> mutex_init(&priv->lock);
>> >>>>> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>> >>>>>
>> >>>>>- priv->dev.class = drm_class;
>> >>>>>+ priv->dev.class = &drm_class;
>> >>>>> priv->dev.type = &drm_privacy_screen_type;
>> >>>>> priv->dev.parent = parent;
>> >>>>> priv->dev.release = drm_privacy_screen_device_release;
>> >>>>>diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
>> >>>>>index a953f69a34b6..56ca9e22c720 100644
>> >>>>>--- a/drivers/gpu/drm/drm_sysfs.c
>> >>>>>+++ b/drivers/gpu/drm/drm_sysfs.c
>> >>>>>@@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
>> >>>>> .name = "drm_connector",
>> >>>>> };
>> >>>>>
>> >>>>>-struct class *drm_class;
>> >>>>>-
>> >>>>> #ifdef CONFIG_ACPI
>> >>>>> static bool drm_connector_acpi_bus_match(struct device *dev)
>> >>>>> {
>> >>>>>@@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>> >>>>>
>> >>>>> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>> >>>>>
>> >>>>>+static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
>> >>>>>+{
>> >>>>>+ struct drm_minor *minor = cd->driver_data;
>> >>>>>+ struct drm_device *ddev = minor->dev;
>> >>>>>+ struct drm_file *priv;
>> >>>>>+ ssize_t offset = 0;
>> >>>>>+ void *pid_buf;
>> >>>>>+
>> >>>>>+ if (minor->type != DRM_MINOR_RENDER)
>> >>>>>+ return 0;
>> >>>
>> >>>Why this?
>> >>
>> >>I return nothing in case of a non-render node because we don't want display drivers
>> >>to confuse UM GPU profilers.
>> >
>> >Feels to arbitrary to me. Let them handle it.
>> >
>> >>>>>+
>> >>>>>+ pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
>> >>>
>> >>>I don't quite get the kvmalloc for just one page (or why even a temporay
>> >>>buffer and not write into buf directly?).
>> >>
>> >>Should've used kmalloc, you're right. Or else I could just write everything straight into 'buf'.
>> >>
>> >>>>>+ if (!pid_buf)
>> >>>>>+ return 0;
>> >>>>>+
>> >>>>>+ mutex_lock(&ddev->filelist_mutex);
>> >>>>>+ list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
>> >>>>>+ struct pid *pid;
>> >>>>>+
>> >>>>>+ if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
>> >>>>>+ break;
>> >>>
>> >>>Feels bad.. I would suggest exploring implementing a read callback (instead of
>> >>>show) and handling arbitrary size output.
>> >>
>> >>I think regular class attributes can only implement show() and set(). For a more complex
>> >>interface, I would have to turn it into an actual binary attribute, and that would be the only
>> >>choice if we want the list of clients to be of arbitrary size.
>> >
>> >Yeah, i915 uses that to dump the error capture file which can be huge
>> >and is text so it is doable.
>> >
>> >>>>>+
>> >>>>>+ rcu_read_lock();
>> >>>>>+ pid = rcu_dereference(priv->pid);
>> >>>>>+ (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
>> >>>>>+ rcu_read_unlock();
>> >>>>>+
>> >>>>>+ offset += sizeof(pid_t);
>> >>>>>+ }
>> >>>>>+ mutex_unlock(&ddev->filelist_mutex);
>> >>>>>+
>> >>>>>+ if (offset < PAGE_SIZE)
>> >>>>>+ (*(pid_t *)(pid_buf + offset)) = 0;
>> >>>
>> >>>Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
>> >>>terminated feels weird. If I got that right.
>> >>>
>> >>>For me everything points towards going for text output.
>> >>
>> >>Yes, I know it might sound weird, but my reasoning was: either there are PAGE_SIZE/sizeof(pid) entries
>> >>and the file isn't NULL terminated (which should be picked up by clients as being one page worth
>> >>of data, the sysfs attribute maximum transfer unit), or else there aren't enough entries to fill
>> >>a page and after the last one there's a NULL entry.
>> >>
>> >>
>> >>>>>+
>> >>>>>+ memcpy(buf, pid_buf, offset);
>> >>>>>+
>> >>>>>+ kvfree(pid_buf);
>> >>>>>+
>> >>>>>+ return offset;
>> >>>>>+
>> >>>>>+}
>> >>>>>+static DEVICE_ATTR_RO(clients);
>>
>>
>> /proc/<pid>/fdinfo/ is only readable by the owner. if we report what are
>> the open fds (or even what are the pids with a drm fd), we are
>> leaking that info. So we should probably make this
>> DEVICE_ATTR_ADMIN_RO.
>
>I think this is an argument for _only_ listing the PID and not full
>paths. I have a use-case where I'd prefer it not to be
>DEVICE_ATTR_ADMIN_RO
maybe... today you still can't get the pids that have a file open, even if you
don't get the exact fd number. And it must have some good reasons because the
dir is S_IRUGO|S_IXUGO, yet kernel refuses to let me see what is inside.
This is interesting and possibly relevant to the discussion:
7bc3fa0172a4 ("procfs: allow reading fdinfo with PTRACE_MODE_READ")
>
>That said, I think I do prefer txt rather than binary. If nothing
>else, it makes it easier to deal with for scripting, which is also
>useful sometimes.
agreed. Even if it's only pid, just make it text and the app will use it
as is to open the fdinfo path.
Lucas De Marchi
>
>BR,
>-R
>
>> Lucas De Marchi
>>
>> >>>
>> >>>Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
>> >>
>> >>Like I said above, I sort of faked a binary attribute through the regular sysfs attr API,
>> >>which is most likely a bad idea.
>> >>
>> >>>Regards,
>> >>>
>> >>>Tvrtko
>> >>>
>> >>>P.S. Or maybe it is time for drmfs? Where each client gets a directory and
>> >>>drivers can populate files. Such as per client logging streams and whatnot.
>> >>
>> >>Yes, but maybe this is something we can discuss in depth in an RFC at a later time?
>> >
>> >Yes of course, it is just a long standing idea for flexible per client
>> >stuff.
>> >
>> >Regards,
>> >
>> >Tvrtko
>> >
>> >>
>> >>>>>+
>> >>>>>+static struct attribute *drm_device_attrs[] = {
>> >>>>>+ &dev_attr_clients.attr,
>> >>>>>+ NULL,
>> >>>>>+};
>> >>>>>+ATTRIBUTE_GROUPS(drm_device);
>> >>>>>+
>> >>>>>+struct class drm_class = {
>> >>>>>+ .name = "drm",
>> >>>>>+ .dev_groups = drm_device_groups,
>> >>>>>+};
>> >>>>>+
>> >>>>>+static bool drm_class_initialised;
>> >>>>>+
>> >>>>> /**
>> >>>>> * drm_sysfs_init - initialize sysfs helpers
>> >>>>> *
>> >>>>>@@ -142,18 +196,19 @@ int drm_sysfs_init(void)
>> >>>>> {
>> >>>>> int err;
>> >>>>>
>> >>>>>- drm_class = class_create("drm");
>> >>>>>- if (IS_ERR(drm_class))
>> >>>>>- return PTR_ERR(drm_class);
>> >>>>>+ err = class_register(&drm_class);
>> >>>>>+ if (err)
>> >>>>>+ return err;
>> >>>>>
>> >>>>>- err = class_create_file(drm_class, &class_attr_version.attr);
>> >>>>>+ err = class_create_file(&drm_class, &class_attr_version.attr);
>> >>>>> if (err) {
>> >>>>>- class_destroy(drm_class);
>> >>>>>- drm_class = NULL;
>> >>>>>+ class_destroy(&drm_class);
>> >>>>> return err;
>> >>>>> }
>> >>>>>
>> >>>>>- drm_class->devnode = drm_devnode;
>> >>>>>+ drm_class.devnode = drm_devnode;
>> >>>>>+
>> >>>>>+ drm_class_initialised = true;
>> >>>>>
>> >>>>> drm_sysfs_acpi_register();
>> >>>>> return 0;
>> >>>>>@@ -166,12 +221,12 @@ int drm_sysfs_init(void)
>> >>>>> */
>> >>>>> void drm_sysfs_destroy(void)
>> >>>>> {
>> >>>>>- if (IS_ERR_OR_NULL(drm_class))
>> >>>>>+ if (!drm_class_initialised)
>> >>>>> return;
>> >>>>> drm_sysfs_acpi_unregister();
>> >>>>>- class_remove_file(drm_class, &class_attr_version.attr);
>> >>>>>- class_destroy(drm_class);
>> >>>>>- drm_class = NULL;
>> >>>>>+ class_remove_file(&drm_class, &class_attr_version.attr);
>> >>>>>+ class_destroy(&drm_class);
>> >>>>>+ drm_class_initialised = false;
>> >>>>> }
>> >>>>>
>> >>>>> static void drm_sysfs_release(struct device *dev)
>> >>>>>@@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
>> >>>>> return -ENOMEM;
>> >>>>>
>> >>>>> device_initialize(kdev);
>> >>>>>- kdev->class = drm_class;
>> >>>>>+ kdev->class = &drm_class;
>> >>>>> kdev->type = &drm_sysfs_device_connector;
>> >>>>> kdev->parent = dev->primary->kdev;
>> >>>>> kdev->groups = connector_dev_groups;
>> >>>>>@@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>> >>>>> minor_str = "card%d";
>> >>>>>
>> >>>>> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
>> >>>>>- kdev->class = drm_class;
>> >>>>>+ kdev->class = &drm_class;
>> >>>>> kdev->type = &drm_sysfs_device_minor;
>> >>>>> }
>> >>>>>
>> >>>>>@@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>> >>>>> */
>> >>>>> int drm_class_device_register(struct device *dev)
>> >>>>> {
>> >>>>>- if (!drm_class || IS_ERR(drm_class))
>> >>>>>+ if (!drm_class_initialised)
>> >>>>> return -ENOENT;
>> >>>>>
>> >>>>>- dev->class = drm_class;
>> >>>>>+ dev->class = &drm_class;
>> >>>>> return device_register(dev);
>> >>>>> }
>> >>>>> EXPORT_SYMBOL_GPL(drm_class_device_register);
>> >>>>>
>> >>>>>base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
>> >>>>>--
>> >>>>>2.44.0
>> >>>>>
>> >>
>> >>Adrian Larumbe
Hi,
On 24/04/2024 15:48, Adrián Larumbe wrote:
> Hi Tvrtko,
>
> On 15.04.2024 13:50, Tvrtko Ursulin wrote:
>>
>> On 05/04/2024 18:59, Rob Clark wrote:
>>> On Wed, Apr 3, 2024 at 11:37 AM Adrián Larumbe
>>> <[email protected]> wrote:
>>>>
>>>> Up to this day, all fdinfo-based GPU profilers must traverse the entire
>>>> /proc directory structure to find open DRM clients with fdinfo file
>>>> descriptors. This is inefficient and time-consuming.
>>>>
>>>> This patch adds a new device class attribute that will install a sysfs file
>>>> per DRM device, which can be queried by profilers to get a list of PIDs for
>>>> their open clients. This file isn't human-readable, and it's meant to be
>>>> queried only by GPU profilers like gputop and nvtop.
>>>>
>>>> Cc: Boris Brezillon <[email protected]>
>>>> Cc: Tvrtko Ursulin <[email protected]>
>>>> Cc: Christopher Healy <[email protected]>
>>>> Signed-off-by: Adrián Larumbe <[email protected]>
>>>
>>> It does seem like a good idea.. idk if there is some precedent to
>>> prefer binary vs ascii in sysfs, but having a way to avoid walking
>>> _all_ processes is a good idea.
>>
>> I naturally second that it is a needed feature, but I do not think binary
>> format is justified. AFAIR it should be used for things like hw/fw
>> standardised tables or firmware images, not when exporting a simple list of
>> PIDs. It also precludes easy shell/script access and the benefit of avoiding
>> parsing a short list is I suspect completely dwarfed by needing to parse all
>> the related fdinfo etc.
>
> I'd rather keep it as a binary file for the sake of easily parsing the number
> list on the client side, in gputop or nvtop. For textual access, there's already
> a debugfs file that presents the same information, so I thought it was best not
> to duplicate that functionality and restrict sysfs to serving the very specific
> use case of UM profilers having to access the DRM client list.
>
> I should mention I did something controversial here, which is a semantically
> binary attribute through the regular attribute interface. I guess if I keep it
> as a binary attribute in the end, I should switch over to the binary attribute
> API.
>
> Another reason why I implemented it as a binary file is that we can only send
> back at most a whole page. If a PID takes 4 bytes, that's usually 1024 clients
> at most, which is probably enough for any UM profiler, but will decrease even
> more if we turn it into an ASCII readable file.
I'm afraid I still think there is no reason for a binary file, even less
so artificially limited to 1024 clients. Any consumer will have to parse
text fdinfo so a binary list of pids is not adding any real cost.
> I did some research into sysfs binary attributes, and while some sources mention that
> it's often used for dumping or loading of driver FW, none of them claim it cannot
> be used for other purposes.
>
>>>> ---
>>>> drivers/gpu/drm/drm_internal.h | 2 +-
>>>> drivers/gpu/drm/drm_privacy_screen.c | 2 +-
>>>> drivers/gpu/drm/drm_sysfs.c | 89 ++++++++++++++++++++++------
>>>> 3 files changed, 74 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>> index 2215baef9a3e..9a399b03d11c 100644
>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>> @@ -145,7 +145,7 @@ bool drm_master_internal_acquire(struct drm_device *dev);
>>>> void drm_master_internal_release(struct drm_device *dev);
>>>>
>>>> /* drm_sysfs.c */
>>>> -extern struct class *drm_class;
>>>> +extern struct class drm_class;
>>>>
>>>> int drm_sysfs_init(void);
>>>> void drm_sysfs_destroy(void);
>>>> diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c
>>>> index 6cc39e30781f..2fbd24ba5818 100644
>>>> --- a/drivers/gpu/drm/drm_privacy_screen.c
>>>> +++ b/drivers/gpu/drm/drm_privacy_screen.c
>>>> @@ -401,7 +401,7 @@ struct drm_privacy_screen *drm_privacy_screen_register(
>>>> mutex_init(&priv->lock);
>>>> BLOCKING_INIT_NOTIFIER_HEAD(&priv->notifier_head);
>>>>
>>>> - priv->dev.class = drm_class;
>>>> + priv->dev.class = &drm_class;
>>>> priv->dev.type = &drm_privacy_screen_type;
>>>> priv->dev.parent = parent;
>>>> priv->dev.release = drm_privacy_screen_device_release;
>>>> diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
>>>> index a953f69a34b6..56ca9e22c720 100644
>>>> --- a/drivers/gpu/drm/drm_sysfs.c
>>>> +++ b/drivers/gpu/drm/drm_sysfs.c
>>>> @@ -58,8 +58,6 @@ static struct device_type drm_sysfs_device_connector = {
>>>> .name = "drm_connector",
>>>> };
>>>>
>>>> -struct class *drm_class;
>>>> -
>>>> #ifdef CONFIG_ACPI
>>>> static bool drm_connector_acpi_bus_match(struct device *dev)
>>>> {
>>>> @@ -128,6 +126,62 @@ static const struct component_ops typec_connector_ops = {
>>>>
>>>> static CLASS_ATTR_STRING(version, S_IRUGO, "drm 1.1.0 20060810");
>>>>
>>>> +static ssize_t clients_show(struct device *cd, struct device_attribute *attr, char *buf)
>>>> +{
>>>> + struct drm_minor *minor = cd->driver_data;
>>>> + struct drm_device *ddev = minor->dev;
>>>> + struct drm_file *priv;
>>>> + ssize_t offset = 0;
>>>> + void *pid_buf;
>>>> +
>>>> + if (minor->type != DRM_MINOR_RENDER)
>>>> + return 0;
>>
>> Why this?
>
> I return nothing in case of a non-render node because we don't want display drivers
> to confuse UM GPU profilers.
Feels to arbitrary to me. Let them handle it.
>>>> +
>>>> + pid_buf = kvmalloc(PAGE_SIZE, GFP_KERNEL);
>>
>> I don't quite get the kvmalloc for just one page (or why even a temporay
>> buffer and not write into buf directly?).
>
> Should've used kmalloc, you're right. Or else I could just write everything straight into 'buf'.
>
>>>> + if (!pid_buf)
>>>> + return 0;
>>>> +
>>>> + mutex_lock(&ddev->filelist_mutex);
>>>> + list_for_each_entry_reverse(priv, &ddev->filelist, lhead) {
>>>> + struct pid *pid;
>>>> +
>>>> + if (drm_WARN_ON(ddev, (PAGE_SIZE - offset) < sizeof(pid_t)))
>>>> + break;
>>
>> Feels bad.. I would suggest exploring implementing a read callback (instead of
>> show) and handling arbitrary size output.
>
> I think regular class attributes can only implement show() and set(). For a more complex
> interface, I would have to turn it into an actual binary attribute, and that would be the only
> choice if we want the list of clients to be of arbitrary size.
Yeah, i915 uses that to dump the error capture file which can be huge
and is text so it is doable.
>>>> +
>>>> + rcu_read_lock();
>>>> + pid = rcu_dereference(priv->pid);
>>>> + (*(pid_t *)(pid_buf + offset)) = pid_vnr(pid);
>>>> + rcu_read_unlock();
>>>> +
>>>> + offset += sizeof(pid_t);
>>>> + }
>>>> + mutex_unlock(&ddev->filelist_mutex);
>>>> +
>>>> + if (offset < PAGE_SIZE)
>>>> + (*(pid_t *)(pid_buf + offset)) = 0;
>>
>> Either NULL terminated or PAGE_SIZE/sizeof(pid) entries and not NULL
>> terminated feels weird. If I got that right.
>>
>> For me everything points towards going for text output.
>
> Yes, I know it might sound weird, but my reasoning was: either there are PAGE_SIZE/sizeof(pid) entries
> and the file isn't NULL terminated (which should be picked up by clients as being one page worth
> of data, the sysfs attribute maximum transfer unit), or else there aren't enough entries to fill
> a page and after the last one there's a NULL entry.
>
>
>>>> +
>>>> + memcpy(buf, pid_buf, offset);
>>>> +
>>>> + kvfree(pid_buf);
>>>> +
>>>> + return offset;
>>>> +
>>>> +}
>>>> +static DEVICE_ATTR_RO(clients);
>>
>> Shouldn't BIN_ATTR_RO be used for binary files in sysfs?
>
> Like I said above, I sort of faked a binary attribute through the regular sysfs attr API,
> which is most likely a bad idea.
>
>> Regards,
>>
>> Tvrtko
>>
>> P.S. Or maybe it is time for drmfs? Where each client gets a directory and
>> drivers can populate files. Such as per client logging streams and whatnot.
>
> Yes, but maybe this is something we can discuss in depth in an RFC at a later time?
Yes of course, it is just a long standing idea for flexible per client
stuff.
Regards,
Tvrtko
>
>>>> +
>>>> +static struct attribute *drm_device_attrs[] = {
>>>> + &dev_attr_clients.attr,
>>>> + NULL,
>>>> +};
>>>> +ATTRIBUTE_GROUPS(drm_device);
>>>> +
>>>> +struct class drm_class = {
>>>> + .name = "drm",
>>>> + .dev_groups = drm_device_groups,
>>>> +};
>>>> +
>>>> +static bool drm_class_initialised;
>>>> +
>>>> /**
>>>> * drm_sysfs_init - initialize sysfs helpers
>>>> *
>>>> @@ -142,18 +196,19 @@ int drm_sysfs_init(void)
>>>> {
>>>> int err;
>>>>
>>>> - drm_class = class_create("drm");
>>>> - if (IS_ERR(drm_class))
>>>> - return PTR_ERR(drm_class);
>>>> + err = class_register(&drm_class);
>>>> + if (err)
>>>> + return err;
>>>>
>>>> - err = class_create_file(drm_class, &class_attr_version.attr);
>>>> + err = class_create_file(&drm_class, &class_attr_version.attr);
>>>> if (err) {
>>>> - class_destroy(drm_class);
>>>> - drm_class = NULL;
>>>> + class_destroy(&drm_class);
>>>> return err;
>>>> }
>>>>
>>>> - drm_class->devnode = drm_devnode;
>>>> + drm_class.devnode = drm_devnode;
>>>> +
>>>> + drm_class_initialised = true;
>>>>
>>>> drm_sysfs_acpi_register();
>>>> return 0;
>>>> @@ -166,12 +221,12 @@ int drm_sysfs_init(void)
>>>> */
>>>> void drm_sysfs_destroy(void)
>>>> {
>>>> - if (IS_ERR_OR_NULL(drm_class))
>>>> + if (!drm_class_initialised)
>>>> return;
>>>> drm_sysfs_acpi_unregister();
>>>> - class_remove_file(drm_class, &class_attr_version.attr);
>>>> - class_destroy(drm_class);
>>>> - drm_class = NULL;
>>>> + class_remove_file(&drm_class, &class_attr_version.attr);
>>>> + class_destroy(&drm_class);
>>>> + drm_class_initialised = false;
>>>> }
>>>>
>>>> static void drm_sysfs_release(struct device *dev)
>>>> @@ -372,7 +427,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
>>>> return -ENOMEM;
>>>>
>>>> device_initialize(kdev);
>>>> - kdev->class = drm_class;
>>>> + kdev->class = &drm_class;
>>>> kdev->type = &drm_sysfs_device_connector;
>>>> kdev->parent = dev->primary->kdev;
>>>> kdev->groups = connector_dev_groups;
>>>> @@ -550,7 +605,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>>>> minor_str = "card%d";
>>>>
>>>> kdev->devt = MKDEV(DRM_MAJOR, minor->index);
>>>> - kdev->class = drm_class;
>>>> + kdev->class = &drm_class;
>>>> kdev->type = &drm_sysfs_device_minor;
>>>> }
>>>>
>>>> @@ -579,10 +634,10 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
>>>> */
>>>> int drm_class_device_register(struct device *dev)
>>>> {
>>>> - if (!drm_class || IS_ERR(drm_class))
>>>> + if (!drm_class_initialised)
>>>> return -ENOENT;
>>>>
>>>> - dev->class = drm_class;
>>>> + dev->class = &drm_class;
>>>> return device_register(dev);
>>>> }
>>>> EXPORT_SYMBOL_GPL(drm_class_device_register);
>>>>
>>>> base-commit: 45c734fdd43db14444025910b4c59dd2b8be714a
>>>> --
>>>> 2.44.0
>>>>
>
> Adrian Larumbe