2022-10-01 01:49:54

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 0/7] drm/arm/hdlcd: use drm managed resources

Hi,

This patch series converts the driver to use drm managed resources to prevent
potential use-after-free issues on driver unbind/rebind and to get rid of the
usage of deprecated APIs.

Changes in v2:
- drop patch "drm/arm/hdlcd: crtc: use drmm_crtc_init_with_planes()"

Changes in v3:
- Fix alternate return paths in srcu read-side critical sections causing a
stall when unregistering the driver.
- Fix potential null pointer dereference in hdlcd_crtc_cleanup() introduced
dropping the patch in v2.
- Add a patch to remove explicit calls to drm_mode_config_cleanup().

Danilo Krummrich (7):
drm/arm/hdlcd: use drmm_* to allocate driver structures
drm/arm/hdlcd: replace drm->dev_private with drm_to_hdlcd_priv()
drm/arm/hdlcd: plane: use drm managed resources
drm/arm/hdlcd: use drm_dev_unplug()
drm/arm/hdlcd: crtc: protect device resources after removal
drm/arm/hdlcd: debugfs: protect device resources after removal
drm/arm/hdlcd: remove calls to drm_mode_config_cleanup()

drivers/gpu/drm/arm/hdlcd_crtc.c | 84 ++++++++++++++++++++++++--------
drivers/gpu/drm/arm/hdlcd_drv.c | 55 ++++++++++++---------
drivers/gpu/drm/arm/hdlcd_drv.h | 2 +
3 files changed, 98 insertions(+), 43 deletions(-)


base-commit: 08fb97de03aa2205c6791301bd83a095abc1949c
--
2.37.3


2022-10-01 01:51:50

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 2/7] drm/arm/hdlcd: replace drm->dev_private with drm_to_hdlcd_priv()

Using drm_device->dev_private is deprecated. Since we've switched to
devm_drm_dev_alloc(), struct drm_device is now embedded in struct
hdlcd_drm_private, hence we can use container_of() to get the struct
drm_device instance instead.

Signed-off-by: Danilo Krummrich <[email protected]>
---
drivers/gpu/drm/arm/hdlcd_crtc.c | 4 ++--
drivers/gpu/drm/arm/hdlcd_drv.c | 10 ++++------
drivers/gpu/drm/arm/hdlcd_drv.h | 1 +
3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
index 7030339fa232..4a8959d0b2a6 100644
--- a/drivers/gpu/drm/arm/hdlcd_crtc.c
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -275,7 +275,7 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane,
dest_h = drm_rect_height(&new_plane_state->dst);
scanout_start = drm_fb_dma_get_gem_addr(fb, new_plane_state, 0);

- hdlcd = plane->dev->dev_private;
+ hdlcd = drm_to_hdlcd_priv(plane->dev);
hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]);
hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_PITCH, fb->pitches[0]);
hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_COUNT, dest_h - 1);
@@ -325,7 +325,7 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)

int hdlcd_setup_crtc(struct drm_device *drm)
{
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
struct drm_plane *primary;
int ret;

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 463381d11cff..120c87934a91 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -98,7 +98,7 @@ static void hdlcd_irq_uninstall(struct hdlcd_drm_private *hdlcd)

static int hdlcd_load(struct drm_device *drm, unsigned long flags)
{
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
struct platform_device *pdev = to_platform_device(drm->dev);
struct resource *res;
u32 version;
@@ -190,7 +190,7 @@ static int hdlcd_show_underrun_count(struct seq_file *m, void *arg)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *drm = node->minor->dev;
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);

seq_printf(m, "underrun : %d\n", atomic_read(&hdlcd->buffer_underrun_count));
seq_printf(m, "dma_end : %d\n", atomic_read(&hdlcd->dma_end_count));
@@ -203,7 +203,7 @@ static int hdlcd_show_pxlclock(struct seq_file *m, void *arg)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *drm = node->minor->dev;
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
unsigned long clkrate = clk_get_rate(hdlcd->clk);
unsigned long mode_clock = hdlcd->crtc.mode.crtc_clock * 1000;

@@ -253,7 +253,6 @@ static int hdlcd_drm_bind(struct device *dev)

drm = &hdlcd->base;

- drm->dev_private = hdlcd;
dev_set_drvdata(dev, drm);

hdlcd_setup_mode_config(drm);
@@ -324,7 +323,7 @@ static int hdlcd_drm_bind(struct device *dev)
static void hdlcd_drm_unbind(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);

drm_dev_unregister(drm);
drm_kms_helper_poll_fini(drm);
@@ -339,7 +338,6 @@ static void hdlcd_drm_unbind(struct device *dev)
pm_runtime_disable(dev);
of_reserved_mem_device_release(dev);
drm_mode_config_cleanup(drm);
- drm->dev_private = NULL;
dev_set_drvdata(dev, NULL);
}

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h
index 3892b36767ac..f1c1da2ac2db 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.h
+++ b/drivers/gpu/drm/arm/hdlcd_drv.h
@@ -21,6 +21,7 @@ struct hdlcd_drm_private {
#endif
};

+#define drm_to_hdlcd_priv(x) container_of(x, struct hdlcd_drm_private, base)
#define crtc_to_hdlcd_priv(x) container_of(x, struct hdlcd_drm_private, crtc)

static inline void hdlcd_write(struct hdlcd_drm_private *hdlcd,
--
2.37.3

2022-10-01 01:54:27

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 7/7] drm/arm/hdlcd: remove calls to drm_mode_config_cleanup()

drm_mode_config_init() simply calls drmm_mode_config_init(), hence
cleanup is automatically handled through registering
drm_mode_config_cleanup() with drmm_add_action_or_reset().

While at it, get rid of the deprecated drm_mode_config_init() and
replace it with drmm_mode_config_init() directly.

Signed-off-by: Danilo Krummrich <[email protected]>
---
drivers/gpu/drm/arm/hdlcd_drv.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 020c7d0c70a5..e242b6223d77 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -175,14 +175,21 @@ static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
.atomic_commit = drm_atomic_helper_commit,
};

-static void hdlcd_setup_mode_config(struct drm_device *drm)
+static int hdlcd_setup_mode_config(struct drm_device *drm)
{
- drm_mode_config_init(drm);
+ int ret;
+
+ ret = drmm_mode_config_init(drm);
+ if (ret)
+ return ret;
+
drm->mode_config.min_width = 0;
drm->mode_config.min_height = 0;
drm->mode_config.max_width = HDLCD_MAX_XRES;
drm->mode_config.max_height = HDLCD_MAX_YRES;
drm->mode_config.funcs = &hdlcd_mode_config_funcs;
+
+ return 0;
}

#ifdef CONFIG_DEBUG_FS
@@ -263,7 +270,10 @@ static int hdlcd_drm_bind(struct device *dev)

dev_set_drvdata(dev, drm);

- hdlcd_setup_mode_config(drm);
+ ret = hdlcd_setup_mode_config(drm);
+ if (ret)
+ goto err_free;
+
ret = hdlcd_load(drm, 0);
if (ret)
goto err_free;
@@ -322,9 +332,7 @@ static int hdlcd_drm_bind(struct device *dev)
hdlcd_irq_uninstall(hdlcd);
of_reserved_mem_device_release(drm->dev);
err_free:
- drm_mode_config_cleanup(drm);
dev_set_drvdata(dev, NULL);
-
return ret;
}

@@ -345,7 +353,6 @@ static void hdlcd_drm_unbind(struct device *dev)
if (pm_runtime_enabled(dev))
pm_runtime_disable(dev);
of_reserved_mem_device_release(dev);
- drm_mode_config_cleanup(drm);
dev_set_drvdata(dev, NULL);
}

--
2.37.3

2022-10-01 01:55:57

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 6/7] drm/arm/hdlcd: debugfs: protect device resources after removal

(Hardware) resources which are bound to the driver and device lifecycle
must not be accessed after the device and driver are unbound.

However, the DRM device isn't freed as long as the last user didn't
close it, hence userspace can still call into the driver.

Therefore protect the critical sections which are accessing those
resources with drm_dev_enter() and drm_dev_exit().

Signed-off-by: Danilo Krummrich <[email protected]>
---
drivers/gpu/drm/arm/hdlcd_drv.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index e41def6d47cc..020c7d0c70a5 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -204,11 +204,19 @@ static int hdlcd_show_pxlclock(struct seq_file *m, void *arg)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *drm = node->minor->dev;
struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
- unsigned long clkrate = clk_get_rate(hdlcd->clk);
- unsigned long mode_clock = hdlcd->crtc.mode.crtc_clock * 1000;
+ unsigned long clkrate, mode_clock;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return -ENODEV;
+
+ clkrate = clk_get_rate(hdlcd->clk);
+ mode_clock = hdlcd->crtc.mode.crtc_clock * 1000;

seq_printf(m, "hw : %lu\n", clkrate);
seq_printf(m, "mode: %lu\n", mode_clock);
+
+ drm_dev_exit(idx);
return 0;
}

--
2.37.3

2022-10-01 02:48:35

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 3/7] drm/arm/hdlcd: plane: use drm managed resources

Use drm managed resource allocation (drmm_universal_plane_alloc()) in
order to get rid of the explicit destroy hook in struct drm_plane_funcs.

Signed-off-by: Danilo Krummrich <[email protected]>
---
drivers/gpu/drm/arm/hdlcd_crtc.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
index 4a8959d0b2a6..1de0f7b23766 100644
--- a/drivers/gpu/drm/arm/hdlcd_crtc.c
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -290,7 +290,6 @@ static const struct drm_plane_helper_funcs hdlcd_plane_helper_funcs = {
static const struct drm_plane_funcs hdlcd_plane_funcs = {
.update_plane = drm_atomic_helper_update_plane,
.disable_plane = drm_atomic_helper_disable_plane,
- .destroy = drm_plane_cleanup,
.reset = drm_atomic_helper_plane_reset,
.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
@@ -298,24 +297,19 @@ static const struct drm_plane_funcs hdlcd_plane_funcs = {

static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
{
- struct hdlcd_drm_private *hdlcd = drm->dev_private;
+ struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
struct drm_plane *plane = NULL;
u32 formats[ARRAY_SIZE(supported_formats)], i;
- int ret;
-
- plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
- if (!plane)
- return ERR_PTR(-ENOMEM);

for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
formats[i] = supported_formats[i].fourcc;

- ret = drm_universal_plane_init(drm, plane, 0xff, &hdlcd_plane_funcs,
- formats, ARRAY_SIZE(formats),
- NULL,
- DRM_PLANE_TYPE_PRIMARY, NULL);
- if (ret)
- return ERR_PTR(ret);
+ plane = drmm_universal_plane_alloc(drm, struct drm_plane, dev, 0xff,
+ &hdlcd_plane_funcs,
+ formats, ARRAY_SIZE(formats),
+ NULL, DRM_PLANE_TYPE_PRIMARY, NULL);
+ if (IS_ERR(plane))
+ return plane;

drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs);
hdlcd->plane = plane;
--
2.37.3

2022-10-01 03:03:32

by Danilo Krummrich

[permalink] [raw]
Subject: [PATCH drm-misc-next v3 4/7] drm/arm/hdlcd: use drm_dev_unplug()

When the driver is unbound, there might still be users in userspace
having an open fd and are calling into the driver.

While this is fine for drm managed resources, it is not for resources
bound to the device/driver lifecycle, e.g. clocks or MMIO mappings.

To prevent use-after-free issues we need to protect those resources with
drm_dev_enter() and drm_dev_exit(). This does only work if we indicate
that the drm device was unplugged, hence use drm_dev_unplug() instead of
drm_dev_unregister().

Protecting the particular resources with drm_dev_enter()/drm_dev_exit()
is handled by subsequent patches.

Signed-off-by: Danilo Krummrich <[email protected]>
---
drivers/gpu/drm/arm/hdlcd_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 120c87934a91..e41def6d47cc 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -325,7 +325,7 @@ static void hdlcd_drm_unbind(struct device *dev)
struct drm_device *drm = dev_get_drvdata(dev);
struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);

- drm_dev_unregister(drm);
+ drm_dev_unplug(drm);
drm_kms_helper_poll_fini(drm);
component_unbind_all(dev, drm);
of_node_put(hdlcd->crtc.port);
--
2.37.3

2022-10-12 15:19:35

by Liviu Dudau

[permalink] [raw]
Subject: Re: [PATCH drm-misc-next v3 4/7] drm/arm/hdlcd: use drm_dev_unplug()

Hi Danilo,

Appologies again for the delay in reviewing this as I was at XDC last week.

This patch is causing a regression at 'rmmod' time as the drm_crtc_vblank_off() does
not get called when we disable outputs and the HDLCD remains active as I keep getting
unhandled context faults from the arm-smmu driver that sits in front of the HDLCD.

This is the stack trace for it:

root@alarm ~]# rmmod hdlcd
[ 198.981343] Console: switching to colour dummy device 80x25
[ 199.012492] ------------[ cut here ]------------
[ 199.017209] driver forgot to call drm_crtc_vblank_off()
[ 199.023513] WARNING: CPU: 5 PID: 176 at drivers/gpu/drm/drm_atomic_helper.c:1236 disable_outputs+0x2c4/0x2d0 [drm_kms_helper]
[ 199.035055] Modules linked in: hdlcd(-) drm_dma_helper tda998x cec drm_kms_helper drm
[ 199.042929] CPU: 5 PID: 176 Comm: kworker/5:2 Not tainted 6.0.0-rc2-00007-ge17e6f0211cd #6
[ 199.051212] Hardware name: ARM Juno development board (r2) (DT)
[ 199.057143] Workqueue: events drm_mode_rmfb_work_fn [drm]
[ 199.062831] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 199.069809] pc : disable_outputs+0x2c4/0x2d0 [drm_kms_helper]
[ 199.075664] lr : disable_outputs+0x2c4/0x2d0 [drm_kms_helper]
[ 199.081519] sp : ffff80000a8f3b60
[ 199.084836] x29: ffff80000a8f3b60 x28: 0000000000000028 x27: ffff0008013962b8
[ 199.091996] x26: ffff800001049688 x25: ffff800001080520 x24: 0000000000000038
[ 199.099155] x23: 0000000000000000 x22: ffff000801396000 x21: ffff0008013966f0
[ 199.106314] x20: 0000000000000000 x19: ffff00080a65a800 x18: 0000000000000000
[ 199.113472] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[ 199.120630] x14: 00000000000000e1 x13: 0000000000000000 x12: 0000000000000000
[ 199.127788] x11: 0000000000000001 x10: 0000000000000a60 x9 : ffff80000a8f3910
[ 199.134947] x8 : ffff00080149d480 x7 : ffff00097efb5bc0 x6 : 0000000000000083
[ 199.142106] x5 : 0000000000000000 x4 : ffff00097efaea18 x3 : ffff00097efb1c20
[ 199.149264] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff00080149c9c0
[ 199.156423] Call trace:
[ 199.158870] disable_outputs+0x2c4/0x2d0 [drm_kms_helper]
[ 199.164380] drm_atomic_helper_commit_tail+0x24/0xa0 [drm_kms_helper]
[ 199.170933] commit_tail+0x150/0x180 [drm_kms_helper]
[ 199.176093] drm_atomic_helper_commit+0x13c/0x370 [drm_kms_helper]
[ 199.182384] drm_atomic_commit+0xa4/0xe0 [drm]
[ 199.187074] drm_framebuffer_remove+0x434/0x4d4 [drm]
[ 199.192374] drm_mode_rmfb_work_fn+0x74/0x9c [drm]
[ 199.197413] process_one_work+0x1d4/0x330
[ 199.201437] worker_thread+0x220/0x430
[ 199.205195] kthread+0x108/0x10c
[ 199.208430] ret_from_fork+0x10/0x20
[ 199.212015] ---[ end trace 0000000000000000 ]---
[ 199.221088] arm-smmu 7fb10000.iommu: Unhandled context fault: fsr=0x2, iova=0xffa53600, fsynr=0x182, cbfrsynra=0x0, cb=0
[ 199.232958] arm-smmu 7fb10000.iommu: Unhandled context fault: fsr=0x2, iova=0xff800000, fsynr=0x182, cbfrsynra=0x0, cb=0
[ 199.233228] ------------[ cut here ]------------
[ 199.248618] hdlcd 7ff50000.hdlcd: drm_WARN_ON(({ do { __attribute__((__noreturn__)) extern void __compiletime_assert_384(void) __attribute__((__error__("Unsupported access size for {READ,WRITE}_ONCE()."))); if (!((sizeof(vblank->enabled) == sizeof(char) || sizeof(vblank->enabled) == sizeof(short) || sizeof(vblank->????
enabled) == sizeof(int) || sizeof(vblank->enabled) == sizeof(long)) || sizeof(vblank->enabled) == sizeof(long long))) __compiletime_assert_384(); } while (0); (*(const volatile typeof( _Generic((vblank->enabled), char: (char)0, unsigned char: (unsigned char)0, signed char: (signed char)0, unsigned short: (unsigned sho????
rt)0, signed short: (signed short)0, unsigned int: (unsigned int)0, signed int: (signed int)0, unsigned long: (unsigned long)0, signed long: (signed long)0, unsigned long long: (unsigned long long)0, signed long long: (signed long long)0, default: (vblank->enabled))) *)&(vblank->enabled)); }) && drm_core_check_feature????
(dev, DRIVER_MODESET))
[ 199.248790] WARNING: CPU: 4 PID: 285 at drivers/gpu/drm/drm_vblank.c:504 drm_vblank_init_release+0x84/0xb0 [drm]
[ 199.249751] arm-smmu 7fb10000.iommu: Unhandled context fault: fsr=0x2, iova=0xff800000, fsynr=0x182, cbfrsynra=0x0, cb=0
[ 199.291902] arm-scmi firmware:scmi: timed out in resp(caller: scmi_perf_level_set+0xfc/0x120)
[ 199.291926] cpufreq: __target_index: Failed to change cpu frequency: -110
[ 199.334654] Modules linked in: hdlcd(-)
[ 199.345063] scmi-cpufreq scmi_dev.2: Message for 857 type 0 is not expected!
[ 199.355734] drm_dma_helper
[ 199.371060] tda998x
[ 199.384738] cec
[ 199.408686] drm_kms_helper
[ 199.421404] drm
[ 199.436732]
[ 199.450409] CPU: 4 PID: 285 Comm: rmmod Tainted: G W 6.0.0-rc2-00007-ge17e6f0211cd #6
[ 199.475499] Hardware name: ARM Juno development board (r2) (DT)
[ 199.501541] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 199.508531] pc : drm_vblank_init_release+0x84/0xb0 [drm]
[ 199.514314] lr : drm_vblank_init_release+0x84/0xb0 [drm]
[ 199.519877] sp : ffff80000b183b30
[ 199.523195] x29: ffff80000b183b30 x28: ffff0008008b8000 x27: 0000000000000000
[ 199.530374] x26: 0000000000000000 x25: dead000000000100 x24: dead000000000122
[ 199.537550] x23: ffff000801396010 x22: ffff800001008010 x21: ffff000801396000
[ 199.544727] x20: ffff000801396000 x19: ffff000800398480 x18: fffffffffffeb478
[ 199.551904] x17: 000000040044ffff x16: 00400032b5503510 x15: 00000000000003d0
[ 199.559080] x14: 0000000000000001 x13: ffff800009f62f50 x12: 000000000000073e
[ 199.566256] x11: 000000000000026a x10: ffff800009fbe940 x9 : ffff800009f62f50
[ 199.573432] x8 : 00000000ffffefff x7 : ffff800009fbaf50 x6 : 000000000000026a
[ 199.580607] x5 : ffff00097ef9aa18 x4 : 0000000000000000 x3 : 0000000000000027
[ 199.587782] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0008008b8000
[ 199.594957] Call trace:
[ 199.597409] drm_vblank_init_release+0x84/0xb0 [drm]
[ 199.602823] drm_managed_release+0xa8/0x140 [drm]
[ 199.607973] drm_dev_put.part.0+0x78/0xb0 [drm]
[ 199.612946] devm_drm_dev_init_release+0x18/0x30 [drm]
[ 199.618529] devm_action_release+0x14/0x20
[ 199.622654] devres_release_group+0xe0/0x164
[ 199.626949] component_master_del+0xb0/0xc0
[ 199.631154] hdlcd_remove+0x1c/0x2c [hdlcd]
[ 199.635368] platform_remove+0x28/0x60
[ 199.639138] device_remove+0x4c/0x80
[ 199.642731] device_release_driver_internal+0x1e4/0x250
[ 199.647979] driver_detach+0x50/0xe0
[ 199.651572] bus_remove_driver+0x5c/0xbc
[ 199.655513] driver_unregister+0x30/0x60
[ 199.659454] platform_driver_unregister+0x14/0x20
[ 199.664181] hdlcd_platform_driver_exit+0x1c/0xe20 [hdlcd]
[ 199.669698] __arm64_sys_delete_module+0x18c/0x240
[ 199.674513] invoke_syscall+0x48/0x114
[ 199.678286] el0_svc_common.constprop.0+0xcc/0xec
[ 199.683015] do_el0_svc+0x2c/0xc0
[ 199.686350] el0_svc+0x2c/0x84
[ 199.689424] el0t_64_sync_handler+0x11c/0x150
[ 199.693803] el0t_64_sync+0x18c/0x190
[ 199.697485] ---[ end trace 0000000000000000 ]---


Looking at the documentation for drm_dev_unplug, you can get a hint about what is going on:

/*
* [....] There is one
* shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before
* drm_atomic_helper_shutdown() is called. This means that if the disable code
* paths are protected, they will not run on regular driver module unload,
* possibly leaving the hardware enabled.
*/

I've reverted this patch and I can remove most of the time, but I also get this crash
from time to time:

[root@alarm ~]# rmmod hdlcd
[ 5442.625918] Console: switching to colour dummy device 80x25
[ 5442.662398] Unable to handle kernel paging request at virtual address ffff80000aa8c230
[ 5442.672018] Mem abort info:
[ 5442.675537] ESR = 0x0000000096000047
[ 5442.679654] EC = 0x25: DABT (current EL), IL = 32 bits
[ 5442.685355] SET = 0, FnV = 0
[ 5442.688759] EA = 0, S1PTW = 0
[ 5442.692122] FSC = 0x07: level 3 translation fault
[ 5442.697031] Data abort info:
[ 5442.699912] ISV = 0, ISS = 0x00000047
[ 5442.703766] CM = 0, WnR = 1
[ 5442.706749] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000081a7f000
[ 5442.713493] [ffff80000aa8c230] pgd=10000009fffff003, p4d=10000009fffff003, pud=10000009ffffe003, pmd=10000008811dd003, pte=0000000000000000
[ 5442.726214] Internal error: Oops: 96000047 [#1] PREEMPT SMP
[ 5442.731803] Modules linked in: psmouse libps2 onboard_usb_hub tda998x crct10dif_ce cec polyval_ce ambakmi polyval_generic serio hdlcd(-) drm_dma_helper arm_smccc_trng gpio_keys rng_core drm_kms_helper cfg80211 rfkill fuse drm ip_tables x_tables ipv6
[ 5442.753979] CPU: 0 PID: 274 Comm: rmmod Not tainted 6.0.0-rc2-00007-ge17e6f0211cd-dirty #7
[ 5442.760857] arm-scmi firmware:scmi: timed out in resp(caller: scmi_perf_level_set+0xfc/0x120)
[ 5442.762258] Hardware name: ARM Juno development board (r2) (DT)
[ 5442.762263] pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 5442.770822] cpufreq: __target_index: Failed to change cpu frequency: -110
[ 5442.776718] pc : hdlcd_crtc_cleanup+0x44/0x84 [hdlcd]
[ 5442.776735] lr : hdlcd_crtc_cleanup+0x30/0x84 [hdlcd]
[ 5442.776744] sp : ffff80000a963a60
[ 5442.803918] x29: ffff80000a963a60 x28: ffff00080240e740 x27: 0000000000000000
[ 5442.811071] x26: 0000000000000000 x25: dead000000000100 x24: dead000000000122
[ 5442.816720] arm-scmi firmware:scmi: timed out in resp(caller: scmi_perf_level_set+0xfc/0x120)
[ 5442.818222] x23: ffff000800d1f010 x22: ffff000800d1f2d8 x21: ffff000800d1f200
[ 5442.826803] cpufreq: __target_index: Failed to change cpu frequency: -110
[ 5442.833905] x20: ffff000800d1f000 x19: ffff000800d1f6f0 x18: ffffffffffffffff
[ 5442.833916] x17: 30647261632f6d72 x16: 642f64636c64682e x15: 0000000000000001
[ 5442.833926] x14: 0000000000000004 x13: ffff000800d1f1e8 x12: 0000000000000000
[ 5442.862146] x11: ffff0008005aec68 x10: ffff0008005aeb58 x9 : 0000000000000000
[ 5442.869296] x8 : ffff0008005aeb80 x7 : 0000000000000000 x6 : 0000000000000228
[ 5442.876446] x5 : 000000000000082a x4 : 0000000000000000 x3 : 0000000000000001
[ 5442.883595] x2 : ffff00080240e740 x1 : 0000000000000000 x0 : ffff80000aa8c230
[ 5442.890746] Call trace:
[ 5442.893189] hdlcd_crtc_cleanup+0x44/0x84 [hdlcd]
[ 5442.897902] drm_mode_config_cleanup+0x150/0x2fc [drm]
[ 5442.903160] drm_mode_config_init_release+0x10/0x20 [drm]
[ 5442.908671] drm_managed_release+0xa8/0x140 [drm]
[ 5442.913489] drm_dev_put.part.0+0x78/0xb0 [drm]
[ 5442.918131] devm_drm_dev_init_release+0x18/0x30 [drm]
[ 5442.923380] devm_action_release+0x14/0x20
[ 5442.927487] devres_release_group+0xe0/0x164
[ 5442.931763] component_master_del+0xb0/0xc0
[ 5442.935951] hdlcd_remove+0x1c/0x2c [hdlcd]
[ 5442.940142] platform_remove+0x28/0x60
[ 5442.943894] device_remove+0x4c/0x80
[ 5442.947471] device_release_driver_internal+0x1e4/0x250
[ 5442.952703] driver_detach+0x50/0xe0
[ 5442.956280] bus_remove_driver+0x5c/0xbc
[ 5442.960205] driver_unregister+0x30/0x60
[ 5442.964131] platform_driver_unregister+0x14/0x20
[ 5442.968840] hdlcd_platform_driver_exit+0x1c/0xe20 [hdlcd]
[ 5442.974335] __arm64_sys_delete_module+0x18c/0x240
[ 5442.979133] invoke_syscall+0x48/0x114
[ 5442.982887] el0_svc_common.constprop.0+0xcc/0xec
[ 5442.987597] do_el0_svc+0x2c/0xc0
[ 5442.990915] el0_svc+0x2c/0x84
[ 5442.993972] el0t_64_sync_handler+0x11c/0x150
[ 5442.998334] el0t_64_sync+0x18c/0x190
[ 5443.002001] Code: 540000e0 f85f0260 9108c000 d50332bf (b900001f)
[ 5443.008103] ---[ end trace 0000000000000000 ]---
[ 5443.012793] arm-scmi firmware:scmi: timed out in resp(caller: scmi_perf_level_set+0xfc/0x120)
[ 5443.021366] cpufreq: __target_index: Failed to change cpu frequency: -110


I'm finally getting to a conclusion: I'm still not sure what problem you were trying
to solve when you have started this series and if you have found a scenario where
you've got use after free then I would like to be able to reproduce it on my setup.
Otherwise, I think this whole series introduces a regression on the behaviour of the
driver and I would be inclined to reject it.

Best regards,
Liviu


On Sat, Oct 01, 2022 at 03:19:02AM +0200, Danilo Krummrich wrote:
> When the driver is unbound, there might still be users in userspace
> having an open fd and are calling into the driver.
>
> While this is fine for drm managed resources, it is not for resources
> bound to the device/driver lifecycle, e.g. clocks or MMIO mappings.
>
> To prevent use-after-free issues we need to protect those resources with
> drm_dev_enter() and drm_dev_exit(). This does only work if we indicate
> that the drm device was unplugged, hence use drm_dev_unplug() instead of
> drm_dev_unregister().
>
> Protecting the particular resources with drm_dev_enter()/drm_dev_exit()
> is handled by subsequent patches.
>
> Signed-off-by: Danilo Krummrich <[email protected]>
> ---
> drivers/gpu/drm/arm/hdlcd_drv.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
> index 120c87934a91..e41def6d47cc 100644
> --- a/drivers/gpu/drm/arm/hdlcd_drv.c
> +++ b/drivers/gpu/drm/arm/hdlcd_drv.c
> @@ -325,7 +325,7 @@ static void hdlcd_drm_unbind(struct device *dev)
> struct drm_device *drm = dev_get_drvdata(dev);
> struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
>
> - drm_dev_unregister(drm);
> + drm_dev_unplug(drm);
> drm_kms_helper_poll_fini(drm);
> component_unbind_all(dev, drm);
> of_node_put(hdlcd->crtc.port);
> --
> 2.37.3
>

--
====================
| I would like to |
| fix the world, |
| but they're not |
| giving me the |
\ source code! /
---------------
¯\_(ツ)_/¯

2022-10-14 00:15:30

by Danilo Krummrich

[permalink] [raw]
Subject: Re: [PATCH drm-misc-next v3 4/7] drm/arm/hdlcd: use drm_dev_unplug()

Hi Liviu,

On 10/12/22 17:07, Liviu Dudau wrote:
> Hi Danilo,
>
> Appologies again for the delay in reviewing this as I was at XDC last week.

No worries, thanks for following up.

> Looking at the documentation for drm_dev_unplug, you can get a hint about what is going on:
>
> /*
> * [....] There is one
> * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before
> * drm_atomic_helper_shutdown() is called. This means that if the disable code
> * paths are protected, they will not run on regular driver module unload,
> * possibly leaving the hardware enabled.
> */
>

Yes, that's the issue we have and pretty unfortunate. What we'd want for
platform device drivers is to still be able to enter the sections locked
with drm_dev_{enter,exit} on driver unbind, which we can't for at the
moment.

I discussed this with Daniel Vetter on #dri-devel and for now he
suggests to just not lock access to platform device bound resources and
respin the patchset removing those parts.

Besides that I'll also work on a solution for drm_dev_unplug() /
drm_dev_{enter,exit} to overcome this issue in the future.

> I'm finally getting to a conclusion: I'm still not sure what problem you were trying
> to solve when you have started this series and if you have found a scenario where
> you've got use after free then I would like to be able to reproduce it on my setup.
> Otherwise, I think this whole series introduces a regression on the behaviour of the
> driver and I would be inclined to reject it.

The problem is that DRM modeset objects should never be allocated with
devm_*, since this can result in use-after free issues on driver unload.
They should be freed when the last reference to the object is dropped,
which DRM managed APIs take care of. As a consequence, DRM managed
objects can potentially out-live platform device bound resources, which
then should be protected from access. The first at least we can and
should do.

It's not an issue that's unique to hdlcd, it's just a known issue to be
addressed by all drivers. There's still the shortcoming concerning
protecting platform bound resources as discussed above, but "the drmm
parts should be a good idea no matter what" to cite Daniel.

I'll send a v4 without the drm_dev_{enter,exit} parts removed if that's
fine for you.

- Danilo

>
> Best regards,
> Liviu

2022-10-14 11:39:57

by Liviu Dudau

[permalink] [raw]
Subject: Re: [PATCH drm-misc-next v3 4/7] drm/arm/hdlcd: use drm_dev_unplug()

On Fri, Oct 14, 2022 at 02:07:09AM +0200, Danilo Krummrich wrote:
> Hi Liviu,
>
> On 10/12/22 17:07, Liviu Dudau wrote:
> > Hi Danilo,
> >
> > Appologies again for the delay in reviewing this as I was at XDC last week.
>
> No worries, thanks for following up.
>
> > Looking at the documentation for drm_dev_unplug, you can get a hint about what is going on:
> >
> > /*
> > * [....] There is one
> > * shortcoming however, drm_dev_unplug() marks the drm_device as unplugged before
> > * drm_atomic_helper_shutdown() is called. This means that if the disable code
> > * paths are protected, they will not run on regular driver module unload,
> > * possibly leaving the hardware enabled.
> > */
> >
>
> Yes, that's the issue we have and pretty unfortunate. What we'd want for
> platform device drivers is to still be able to enter the sections locked
> with drm_dev_{enter,exit} on driver unbind, which we can't for at the
> moment.
>
> I discussed this with Daniel Vetter on #dri-devel and for now he suggests to
> just not lock access to platform device bound resources and respin the
> patchset removing those parts.
>
> Besides that I'll also work on a solution for drm_dev_unplug() /
> drm_dev_{enter,exit} to overcome this issue in the future.
>
> > I'm finally getting to a conclusion: I'm still not sure what problem you were trying
> > to solve when you have started this series and if you have found a scenario where
> > you've got use after free then I would like to be able to reproduce it on my setup.
> > Otherwise, I think this whole series introduces a regression on the behaviour of the
> > driver and I would be inclined to reject it.
>
> The problem is that DRM modeset objects should never be allocated with
> devm_*, since this can result in use-after free issues on driver unload.
> They should be freed when the last reference to the object is dropped, which
> DRM managed APIs take care of. As a consequence, DRM managed objects can
> potentially out-live platform device bound resources, which then should be
> protected from access. The first at least we can and should do.
>
> It's not an issue that's unique to hdlcd, it's just a known issue to be
> addressed by all drivers. There's still the shortcoming concerning
> protecting platform bound resources as discussed above, but "the drmm parts
> should be a good idea no matter what" to cite Daniel.
>
> I'll send a v4 without the drm_dev_{enter,exit} parts removed if that's fine
> for you.

Thanks for the description of the problem!

Also bear in mind that hdlcd and malidp use the component framework, which means that
the unbind is happening through the component_master_del() function.

I'm still keen to get a reproducer for the original issue of use-after free on hdlcd
(or malidp) that I can play with so that I can validate the final fix.

Best regards,
Liviu

>
> - Danilo
>
> >
> > Best regards,
> > Liviu
>

--
====================
| I would like to |
| fix the world, |
| but they're not |
| giving me the |
\ source code! /
---------------
¯\_(ツ)_/¯