2022-09-06 20:37:23

by Michał Winiarski

[permalink] [raw]
Subject: [PATCH v4 0/3] drm: Use full allocated minor range for DRM

64 DRM device nodes is not enough for everyone.
Upgrade it to ~512K (which definitely is more than enough).

To allow testing userspace support for >64 devices, add additional DRM
modparam (skip_legacy_minors) which causes DRM to skip allocating minors
in 0-192 range.
Additionally - convert minors to use XArray instead of IDR to simplify the
locking.

v1 -> v2:
Don't touch DRM_MINOR_CONTROL and its range (Simon Ser)

v2 -> v3:
Don't use legacy scheme for >=192 minor range (Dave Airlie)
Add modparam for testing (Dave Airlie)
Add lockdep annotation for IDR (Daniel Vetter)

v3 -> v4:
Convert from IDR to XArray (Matthew Wilcox)

Michał Winiarski (3):
drm: Use XArray instead of IDR for minors
drm: Expand max DRM device number to full MINORBITS
drm: Introduce skip_legacy_minors modparam

drivers/gpu/drm/drm_drv.c | 66 +++++++++++++++++++--------------------
1 file changed, 33 insertions(+), 33 deletions(-)

--
2.37.3


2022-09-06 20:37:22

by Michał Winiarski

[permalink] [raw]
Subject: [PATCH v4 1/3] drm: Use XArray instead of IDR for minors

IDR is deprecated, and since XArray manages its own state with internal
locking, it simplifies the locking on DRM side.
Additionally, don't use the IRQ-safe variant, since operating on drm
minor is not done in IRQ context.

Signed-off-by: Michał Winiarski <[email protected]>
Suggested-by: Matthew Wilcox <[email protected]>
---
drivers/gpu/drm/drm_drv.c | 49 ++++++++++++++-------------------------
1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 8214a0b1ab7f..41799e4d0432 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -34,6 +34,7 @@
#include <linux/pseudo_fs.h>
#include <linux/slab.h>
#include <linux/srcu.h>
+#include <linux/xarray.h>

#include <drm/drm_cache.h>
#include <drm/drm_client.h>
@@ -53,8 +54,7 @@ MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl");
MODULE_DESCRIPTION("DRM shared core routines");
MODULE_LICENSE("GPL and additional rights");

-static DEFINE_SPINLOCK(drm_minor_lock);
-static struct idr drm_minors_idr;
+static DEFINE_XARRAY_ALLOC(drm_minors_xa);

/*
* If the drm core fails to init for whatever reason,
@@ -98,21 +98,18 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
static void drm_minor_alloc_release(struct drm_device *dev, void *data)
{
struct drm_minor *minor = data;
- unsigned long flags;

WARN_ON(dev != minor->dev);

put_device(minor->kdev);

- spin_lock_irqsave(&drm_minor_lock, flags);
- idr_remove(&drm_minors_idr, minor->index);
- spin_unlock_irqrestore(&drm_minor_lock, flags);
+ xa_release(&drm_minors_xa, minor->index);
}

static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
{
struct drm_minor *minor;
- unsigned long flags;
+ u32 id;
int r;

minor = drmm_kzalloc(dev, sizeof(*minor), GFP_KERNEL);
@@ -122,20 +119,12 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
minor->type = type;
minor->dev = dev;

- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&drm_minor_lock, flags);
- r = idr_alloc(&drm_minors_idr,
- NULL,
- 64 * type,
- 64 * (type + 1),
- GFP_NOWAIT);
- spin_unlock_irqrestore(&drm_minor_lock, flags);
- idr_preload_end();
-
+ r = xa_alloc(&drm_minors_xa, &id, NULL,
+ XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);
if (r < 0)
return r;

- minor->index = r;
+ minor->index = id;

r = drmm_add_action_or_reset(dev, drm_minor_alloc_release, minor);
if (r)
@@ -152,7 +141,7 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
static int drm_minor_register(struct drm_device *dev, unsigned int type)
{
struct drm_minor *minor;
- unsigned long flags;
+ void *entry;
int ret;

DRM_DEBUG("\n");
@@ -172,9 +161,12 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
goto err_debugfs;

/* replace NULL with @minor so lookups will succeed from now on */
- spin_lock_irqsave(&drm_minor_lock, flags);
- idr_replace(&drm_minors_idr, minor, minor->index);
- spin_unlock_irqrestore(&drm_minor_lock, flags);
+ entry = xa_store(&drm_minors_xa, minor->index, &minor, GFP_KERNEL);
+ if (xa_is_err(entry)) {
+ ret = xa_err(entry);
+ goto err_debugfs;
+ }
+ WARN_ON(entry);

DRM_DEBUG("new minor registered %d\n", minor->index);
return 0;
@@ -187,16 +179,13 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
{
struct drm_minor *minor;
- unsigned long flags;

minor = *drm_minor_get_slot(dev, type);
if (!minor || !device_is_registered(minor->kdev))
return;

/* replace @minor with NULL so lookups will fail from now on */
- spin_lock_irqsave(&drm_minor_lock, flags);
- idr_replace(&drm_minors_idr, NULL, minor->index);
- spin_unlock_irqrestore(&drm_minor_lock, flags);
+ xa_erase(&drm_minors_xa, minor->index);

device_del(minor->kdev);
dev_set_drvdata(minor->kdev, NULL); /* safety belt */
@@ -215,13 +204,10 @@ static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
struct drm_minor *drm_minor_acquire(unsigned int minor_id)
{
struct drm_minor *minor;
- unsigned long flags;

- spin_lock_irqsave(&drm_minor_lock, flags);
- minor = idr_find(&drm_minors_idr, minor_id);
+ minor = xa_load(&drm_minors_xa, minor_id);
if (minor)
drm_dev_get(minor->dev);
- spin_unlock_irqrestore(&drm_minor_lock, flags);

if (!minor) {
return ERR_PTR(-ENODEV);
@@ -1037,7 +1023,7 @@ static void drm_core_exit(void)
unregister_chrdev(DRM_MAJOR, "drm");
debugfs_remove(drm_debugfs_root);
drm_sysfs_destroy();
- idr_destroy(&drm_minors_idr);
+ xa_destroy(&drm_minors_xa);
drm_connector_ida_destroy();
}

@@ -1046,7 +1032,6 @@ static int __init drm_core_init(void)
int ret;

drm_connector_ida_init();
- idr_init(&drm_minors_idr);
drm_memcpy_init_early();

ret = drm_sysfs_init();
--
2.37.3

2022-09-06 20:59:34

by Michał Winiarski

[permalink] [raw]
Subject: [PATCH v4 2/3] drm: Expand max DRM device number to full MINORBITS

Having a limit of 64 DRM devices is not good enough for modern world
where we have multi-GPU servers, SR-IOV virtual functions and virtual
devices used for testing.
Let's utilize full minor range for DRM devices.
To avoid regressing the existing userspace, we're still maintaining the
numbering scheme where 0-63 is used for primary, 64-127 is reserved
(formerly for control) and 128-191 is used for render.
For minors >= 192, we're allocating minors dynamically on a first-come,
first-served basis.

Signed-off-by: Michał Winiarski <[email protected]>
---
drivers/gpu/drm/drm_drv.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 41799e4d0432..2c6e0b8d3b7a 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -119,8 +119,17 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
minor->type = type;
minor->dev = dev;

+ /*
+ * DRM used to support 64 devices, for backwards compatibility we need to maintain the
+ * minor allocation scheme where minors 0-63 are primary nodes, 64-127 are control nodes,
+ * and 128-191 are render nodes.
+ * After reaching the limit, we're allocating minors dynamically - first-come, first-serve.
+ */
r = xa_alloc(&drm_minors_xa, &id, NULL,
XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);
+ if (r == -EBUSY)
+ r = xa_alloc(&drm_minors_xa, &id, NULL,
+ XA_LIMIT(192, (1 << MINORBITS) - 1), GFP_KERNEL);
if (r < 0)
return r;

--
2.37.3

2022-09-06 21:07:59

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v4 1/3] drm: Use XArray instead of IDR for minors

On Tue, Sep 06, 2022 at 10:16:27PM +0200, Michał Winiarski wrote:
> IDR is deprecated, and since XArray manages its own state with internal
> locking, it simplifies the locking on DRM side.
> Additionally, don't use the IRQ-safe variant, since operating on drm
> minor is not done in IRQ context.
>
> Signed-off-by: Michał Winiarski <[email protected]>
> Suggested-by: Matthew Wilcox <[email protected]>

I have a few questions, but I like where you're going.

> @@ -98,21 +98,18 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
> static void drm_minor_alloc_release(struct drm_device *dev, void *data)
> {
> struct drm_minor *minor = data;
> - unsigned long flags;
>
> WARN_ON(dev != minor->dev);
>
> put_device(minor->kdev);
>
> - spin_lock_irqsave(&drm_minor_lock, flags);
> - idr_remove(&drm_minors_idr, minor->index);
> - spin_unlock_irqrestore(&drm_minor_lock, flags);
> + xa_release(&drm_minors_xa, minor->index);

Has it definitely been unused at this point? I would think that
xa_erase() (an unconditional store) would be the correct function to
call.

> @@ -122,20 +119,12 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
> minor->type = type;
> minor->dev = dev;
>
> - idr_preload(GFP_KERNEL);
> - spin_lock_irqsave(&drm_minor_lock, flags);
> - r = idr_alloc(&drm_minors_idr,
> - NULL,
> - 64 * type,
> - 64 * (type + 1),
> - GFP_NOWAIT);
> - spin_unlock_irqrestore(&drm_minor_lock, flags);
> - idr_preload_end();
> -
> + r = xa_alloc(&drm_minors_xa, &id, NULL,
> + XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);
> if (r < 0)
> return r;
>
> - minor->index = r;
> + minor->index = id;

Wouldn't it be better to call:

r = xa_alloc(&drm_minors_xa, &minor->index, NULL,
XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);

I might also prefer a little syntactic sugar like:

#define DRM_MINOR_LIMIT(type) XA_LIMIT(64 * (type), 64 * (type) + 63)

but that's definitely a matter of taste.

> @@ -172,9 +161,12 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
> goto err_debugfs;
>
> /* replace NULL with @minor so lookups will succeed from now on */
> - spin_lock_irqsave(&drm_minor_lock, flags);
> - idr_replace(&drm_minors_idr, minor, minor->index);
> - spin_unlock_irqrestore(&drm_minor_lock, flags);
> + entry = xa_store(&drm_minors_xa, minor->index, &minor, GFP_KERNEL);
> + if (xa_is_err(entry)) {
> + ret = xa_err(entry);
> + goto err_debugfs;
> + }
> + WARN_ON(entry);

Might be better as an xa_cmpxchg()?

> @@ -187,16 +179,13 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
> static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
> {
> struct drm_minor *minor;
> - unsigned long flags;
>
> minor = *drm_minor_get_slot(dev, type);
> if (!minor || !device_is_registered(minor->kdev))
> return;
>
> /* replace @minor with NULL so lookups will fail from now on */
> - spin_lock_irqsave(&drm_minor_lock, flags);
> - idr_replace(&drm_minors_idr, NULL, minor->index);
> - spin_unlock_irqrestore(&drm_minor_lock, flags);
> + xa_erase(&drm_minors_xa, minor->index);

This isn't an exact replacement, but I'm not sure whether that makes a
difference. xa_erase() allows allocation of this ID again while
idr_replace() means that lookups return NULL, but the ID remains in
use. The equivalent of idr_replace() is:
xa_store(&drm_minors_xa, minor->index, NULL, GFP_KERNEL);

> @@ -215,13 +204,10 @@ static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
> struct drm_minor *drm_minor_acquire(unsigned int minor_id)
> {
> struct drm_minor *minor;
> - unsigned long flags;
>
> - spin_lock_irqsave(&drm_minor_lock, flags);
> - minor = idr_find(&drm_minors_idr, minor_id);
> + minor = xa_load(&drm_minors_xa, minor_id);
> if (minor)
> drm_dev_get(minor->dev);
> - spin_unlock_irqrestore(&drm_minor_lock, flags);

This is also not an exact equivalent as the dev_drm_get() is now outside
the lock. Does that matter in this case? I don't know the code well
enough to say. If you want it to be equivalent, then:

xa_lock(&drm_minors_xa);
minor = xa_load(&drm_minors_xa, minor_id);
if (minor)
drm_dev_get(minor->dev);
xa_unlock(&drm_minors_xa);

would be the code to use.

> @@ -1037,7 +1023,7 @@ static void drm_core_exit(void)
> unregister_chrdev(DRM_MAJOR, "drm");
> debugfs_remove(drm_debugfs_root);
> drm_sysfs_destroy();
> - idr_destroy(&drm_minors_idr);
> + xa_destroy(&drm_minors_xa);

I don't know if this is the right call. xa_destroy() is the exact
replacement, but if the xarray should already be empty (and it should,
right?) then asserting the xa_empty() is true may be the better call
to make.


Phew, that was a lot of comments/questions. I hope that was useful!

2022-09-11 21:20:02

by Michał Winiarski

[permalink] [raw]
Subject: Re: [PATCH v4 1/3] drm: Use XArray instead of IDR for minors

On Tue, Sep 06, 2022 at 10:02:24PM +0100, Matthew Wilcox wrote:
> On Tue, Sep 06, 2022 at 10:16:27PM +0200, Michał Winiarski wrote:
> > IDR is deprecated, and since XArray manages its own state with internal
> > locking, it simplifies the locking on DRM side.
> > Additionally, don't use the IRQ-safe variant, since operating on drm
> > minor is not done in IRQ context.
> >
> > Signed-off-by: Michał Winiarski <[email protected]>
> > Suggested-by: Matthew Wilcox <[email protected]>
>
> I have a few questions, but I like where you're going.
>
> > @@ -98,21 +98,18 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
> > static void drm_minor_alloc_release(struct drm_device *dev, void *data)
> > {
> > struct drm_minor *minor = data;
> > - unsigned long flags;
> >
> > WARN_ON(dev != minor->dev);
> >
> > put_device(minor->kdev);
> >
> > - spin_lock_irqsave(&drm_minor_lock, flags);
> > - idr_remove(&drm_minors_idr, minor->index);
> > - spin_unlock_irqrestore(&drm_minor_lock, flags);
> > + xa_release(&drm_minors_xa, minor->index);
>
> Has it definitely been unused at this point? I would think that
> xa_erase() (an unconditional store) would be the correct function to
> call.

Yes, unless there's a programmers error somewhere - I'll replace it though.

>
> > @@ -122,20 +119,12 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
> > minor->type = type;
> > minor->dev = dev;
> >
> > - idr_preload(GFP_KERNEL);
> > - spin_lock_irqsave(&drm_minor_lock, flags);
> > - r = idr_alloc(&drm_minors_idr,
> > - NULL,
> > - 64 * type,
> > - 64 * (type + 1),
> > - GFP_NOWAIT);
> > - spin_unlock_irqrestore(&drm_minor_lock, flags);
> > - idr_preload_end();
> > -
> > + r = xa_alloc(&drm_minors_xa, &id, NULL,
> > + XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);
> > if (r < 0)
> > return r;
> >
> > - minor->index = r;
> > + minor->index = id;
>
> Wouldn't it be better to call:
>
> r = xa_alloc(&drm_minors_xa, &minor->index, NULL,
> XA_LIMIT(64 * type, 64 * (type + 1) - 1), GFP_KERNEL);
>
> I might also prefer a little syntactic sugar like:
>
> #define DRM_MINOR_LIMIT(type) XA_LIMIT(64 * (type), 64 * (type) + 63)
>
> but that's definitely a matter of taste.

Sure.

>
> > @@ -172,9 +161,12 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
> > goto err_debugfs;
> >
> > /* replace NULL with @minor so lookups will succeed from now on */
> > - spin_lock_irqsave(&drm_minor_lock, flags);
> > - idr_replace(&drm_minors_idr, minor, minor->index);
> > - spin_unlock_irqrestore(&drm_minor_lock, flags);
> > + entry = xa_store(&drm_minors_xa, minor->index, &minor, GFP_KERNEL);
> > + if (xa_is_err(entry)) {
> > + ret = xa_err(entry);
> > + goto err_debugfs;
> > + }
> > + WARN_ON(entry);
>
> Might be better as an xa_cmpxchg()?

Ack.

>
> > @@ -187,16 +179,13 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
> > static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
> > {
> > struct drm_minor *minor;
> > - unsigned long flags;
> >
> > minor = *drm_minor_get_slot(dev, type);
> > if (!minor || !device_is_registered(minor->kdev))
> > return;
> >
> > /* replace @minor with NULL so lookups will fail from now on */
> > - spin_lock_irqsave(&drm_minor_lock, flags);
> > - idr_replace(&drm_minors_idr, NULL, minor->index);
> > - spin_unlock_irqrestore(&drm_minor_lock, flags);
> > + xa_erase(&drm_minors_xa, minor->index);
>
> This isn't an exact replacement, but I'm not sure whether that makes a
> difference. xa_erase() allows allocation of this ID again while
> idr_replace() means that lookups return NULL, but the ID remains in
> use. The equivalent of idr_replace() is:
> xa_store(&drm_minors_xa, minor->index, NULL, GFP_KERNEL);

It does makes a difference, I'll change it to the equivalent.

>
> > @@ -215,13 +204,10 @@ static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
> > struct drm_minor *drm_minor_acquire(unsigned int minor_id)
> > {
> > struct drm_minor *minor;
> > - unsigned long flags;
> >
> > - spin_lock_irqsave(&drm_minor_lock, flags);
> > - minor = idr_find(&drm_minors_idr, minor_id);
> > + minor = xa_load(&drm_minors_xa, minor_id);
> > if (minor)
> > drm_dev_get(minor->dev);
> > - spin_unlock_irqrestore(&drm_minor_lock, flags);
>
> This is also not an exact equivalent as the dev_drm_get() is now outside
> the lock. Does that matter in this case? I don't know the code well
> enough to say. If you want it to be equivalent, then:
>
> xa_lock(&drm_minors_xa);
> minor = xa_load(&drm_minors_xa, minor_id);
> if (minor)
> drm_dev_get(minor->dev);
> xa_unlock(&drm_minors_xa);
>
> would be the code to use.

Again, it does matter, I'll change it.

>
> > @@ -1037,7 +1023,7 @@ static void drm_core_exit(void)
> > unregister_chrdev(DRM_MAJOR, "drm");
> > debugfs_remove(drm_debugfs_root);
> > drm_sysfs_destroy();
> > - idr_destroy(&drm_minors_idr);
> > + xa_destroy(&drm_minors_xa);
>
> I don't know if this is the right call. xa_destroy() is the exact
> replacement, but if the xarray should already be empty (and it should,
> right?) then asserting the xa_empty() is true may be the better call
> to make.

Yeah - I'll replace it with WARN_ON.

>
>
> Phew, that was a lot of comments/questions. I hope that was useful!

Very useful indeed, thanks!

-Michał