2024-04-20 02:33:15

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH 0/3] drm/msm/mdp4: fix probe deferral issues

While testing MDP4 LVDS support I noticed several issues (two are
related to probe deferral case and last one is a c&p error in LCDC
part). Fix those issues.

Signed-off-by: Dmitry Baryshkov <[email protected]>
---
Dmitry Baryshkov (3):
drm/msm: don't clean up priv->kms prematurely
drm/msm/mdp4: don't destroy mdp4_kms in mdp4_kms_init error path
drm/msm/mdp4: correct LCDC regulator name

drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 28 ++++++++---------------
drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 2 +-
drivers/gpu/drm/msm/msm_kms.c | 1 -
3 files changed, 10 insertions(+), 21 deletions(-)
---
base-commit: 7b4f2bc91c15fdcf948bb2d9741a9d7d54303f8d
change-id: 20240420-mdp4-fixes-f33b5a21308b

Best regards,
--
Dmitry Baryshkov <[email protected]>



2024-04-20 02:33:21

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely

MSM display drivers provide kms structure allocated during probe().
Don't clean up priv->kms field in case of an error. Otherwise probe
functions might fail after KMS probe deferral.

Fixes: a2ab5d5bb6b1 ("drm/msm: allow passing struct msm_kms to msm_drv_probe()")
Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/msm_kms.c | 1 -
1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
index af6a6fcb1173..6749f0fbca96 100644
--- a/drivers/gpu/drm/msm/msm_kms.c
+++ b/drivers/gpu/drm/msm/msm_kms.c
@@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
ret = priv->kms_init(ddev);
if (ret) {
DRM_DEV_ERROR(dev, "failed to load kms\n");
- priv->kms = NULL;
return ret;
}


--
2.39.2


2024-04-20 02:33:32

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH 2/3] drm/msm/mdp4: don't destroy mdp4_kms in mdp4_kms_init error path

Since commit 3c74682637e6 ("drm/msm/mdp4: move resource allocation to
the _probe function") the mdp4_kms data is allocated during probe. It is
an error to destroy it during mdp4_kms_init(), as the data is still
referenced by the drivers's data and can be used later in case of probe
deferral.

Fixes: 3c74682637e6 ("drm/msm/mdp4: move resource allocation to the _probe function")
Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 28 +++++++++-------------------
1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 4ba1cb74ad76..4c5f72b7e0e5 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -392,7 +392,7 @@ static int mdp4_kms_init(struct drm_device *dev)
ret = mdp_kms_init(&mdp4_kms->base, &kms_funcs);
if (ret) {
DRM_DEV_ERROR(dev->dev, "failed to init kms\n");
- goto fail;
+ return ret;
}

kms = priv->kms;
@@ -403,7 +403,7 @@ static int mdp4_kms_init(struct drm_device *dev)
ret = regulator_enable(mdp4_kms->vdd);
if (ret) {
DRM_DEV_ERROR(dev->dev, "failed to enable regulator vdd: %d\n", ret);
- goto fail;
+ return ret;
}
}

@@ -414,8 +414,7 @@ static int mdp4_kms_init(struct drm_device *dev)
if (major != 4) {
DRM_DEV_ERROR(dev->dev, "unexpected MDP version: v%d.%d\n",
major, minor);
- ret = -ENXIO;
- goto fail;
+ return -ENXIO;
}

mdp4_kms->rev = minor;
@@ -423,8 +422,7 @@ static int mdp4_kms_init(struct drm_device *dev)
if (mdp4_kms->rev >= 2) {
if (!mdp4_kms->lut_clk) {
DRM_DEV_ERROR(dev->dev, "failed to get lut_clk\n");
- ret = -ENODEV;
- goto fail;
+ return -ENODEV;
}
clk_set_rate(mdp4_kms->lut_clk, max_clk);
}
@@ -445,8 +443,7 @@ static int mdp4_kms_init(struct drm_device *dev)

mmu = msm_iommu_new(&pdev->dev, 0);
if (IS_ERR(mmu)) {
- ret = PTR_ERR(mmu);
- goto fail;
+ return PTR_ERR(mmu);
} else if (!mmu) {
DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
"contig buffers for scanout\n");
@@ -458,8 +455,7 @@ static int mdp4_kms_init(struct drm_device *dev)
if (IS_ERR(aspace)) {
if (!IS_ERR(mmu))
mmu->funcs->destroy(mmu);
- ret = PTR_ERR(aspace);
- goto fail;
+ return PTR_ERR(aspace);
}

kms->aspace = aspace;
@@ -468,7 +464,7 @@ static int mdp4_kms_init(struct drm_device *dev)
ret = modeset_init(mdp4_kms);
if (ret) {
DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret);
- goto fail;
+ return ret;
}

mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC | MSM_BO_SCANOUT);
@@ -476,14 +472,14 @@ static int mdp4_kms_init(struct drm_device *dev)
ret = PTR_ERR(mdp4_kms->blank_cursor_bo);
DRM_DEV_ERROR(dev->dev, "could not allocate blank-cursor bo: %d\n", ret);
mdp4_kms->blank_cursor_bo = NULL;
- goto fail;
+ return ret;
}

ret = msm_gem_get_and_pin_iova(mdp4_kms->blank_cursor_bo, kms->aspace,
&mdp4_kms->blank_cursor_iova);
if (ret) {
DRM_DEV_ERROR(dev->dev, "could not pin blank-cursor bo: %d\n", ret);
- goto fail;
+ return ret;
}

dev->mode_config.min_width = 0;
@@ -492,12 +488,6 @@ static int mdp4_kms_init(struct drm_device *dev)
dev->mode_config.max_height = 2048;

return 0;
-
-fail:
- if (kms)
- mdp4_destroy(kms);
-
- return ret;
}

static const struct dev_pm_ops mdp4_pm_ops = {

--
2.39.2


2024-04-20 02:33:48

by Dmitry Baryshkov

[permalink] [raw]
Subject: [PATCH 3/3] drm/msm/mdp4: correct LCDC regulator name

Correct c&p error from the conversion of LCDC regulators to the bulk
API.

Fixes: 54f1fbcb47d4 ("drm/msm/mdp4: use bulk regulators API for LCDC encoder")
Signed-off-by: Dmitry Baryshkov <[email protected]>
---
drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
index 576995ddce37..8bbc7fb881d5 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c
@@ -389,7 +389,7 @@ struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,

/* TODO: different regulators in other cases? */
mdp4_lcdc_encoder->regs[0].supply = "lvds-vccs-3p3v";
- mdp4_lcdc_encoder->regs[1].supply = "lvds-vccs-3p3v";
+ mdp4_lcdc_encoder->regs[1].supply = "lvds-pll-vdda";
mdp4_lcdc_encoder->regs[2].supply = "lvds-vdda";

ret = devm_regulator_bulk_get(dev->dev,

--
2.39.2


2024-04-20 23:02:19

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely



On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> MSM display drivers provide kms structure allocated during probe().
> Don't clean up priv->kms field in case of an error. Otherwise probe
> functions might fail after KMS probe deferral.
>

So just to understand this more, this will happen when master component
probe (dpu) succeeded but other sub-component probe (dsi) deferred?

Because if master component probe itself deferred it will allocate
priv->kms again isnt it and we will not even hit here.

> Fixes: a2ab5d5bb6b1 ("drm/msm: allow passing struct msm_kms to msm_drv_probe()")
> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/msm_kms.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
> index af6a6fcb1173..6749f0fbca96 100644
> --- a/drivers/gpu/drm/msm/msm_kms.c
> +++ b/drivers/gpu/drm/msm/msm_kms.c
> @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
> ret = priv->kms_init(ddev);
> if (ret) {
> DRM_DEV_ERROR(dev, "failed to load kms\n");
> - priv->kms = NULL;
> return ret;
> }
>
>

2024-04-20 23:05:26

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH 3/3] drm/msm/mdp4: correct LCDC regulator name



On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> Correct c&p error from the conversion of LCDC regulators to the bulk
> API.
>
> Fixes: 54f1fbcb47d4 ("drm/msm/mdp4: use bulk regulators API for LCDC encoder")
> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/disp/mdp4/mdp4_lcdc_encoder.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>

Indeed ! I should have caught this during review :(

Reviewed-by: Abhinav Kumar <[email protected]>

2024-04-21 22:35:35

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely

On Sat, Apr 20, 2024 at 04:02:00PM -0700, Abhinav Kumar wrote:
>
>
> On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> > MSM display drivers provide kms structure allocated during probe().
> > Don't clean up priv->kms field in case of an error. Otherwise probe
> > functions might fail after KMS probe deferral.
> >
>
> So just to understand this more, this will happen when master component
> probe (dpu) succeeded but other sub-component probe (dsi) deferred?
>
> Because if master component probe itself deferred it will allocate priv->kms
> again isnt it and we will not even hit here.

Master probing succeeds (so priv->kms is set), then kms_init fails at
runtime, during binding of the master device. This results in probe
deferral from the last component's component_add() function and reprobe
attempt when possible (once the next device is added or probed). However
as priv->kms is NULL, probe crashes.

>
> > Fixes: a2ab5d5bb6b1 ("drm/msm: allow passing struct msm_kms to msm_drv_probe()")
> > Signed-off-by: Dmitry Baryshkov <[email protected]>
> > ---
> > drivers/gpu/drm/msm/msm_kms.c | 1 -
> > 1 file changed, 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
> > index af6a6fcb1173..6749f0fbca96 100644
> > --- a/drivers/gpu/drm/msm/msm_kms.c
> > +++ b/drivers/gpu/drm/msm/msm_kms.c
> > @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
> > ret = priv->kms_init(ddev);
> > if (ret) {
> > DRM_DEV_ERROR(dev, "failed to load kms\n");
> > - priv->kms = NULL;
> > return ret;
> > }
> >

--
With best wishes
Dmitry

2024-04-22 16:14:27

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely



On 4/21/2024 3:35 PM, Dmitry Baryshkov wrote:
> On Sat, Apr 20, 2024 at 04:02:00PM -0700, Abhinav Kumar wrote:
>>
>>
>> On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
>>> MSM display drivers provide kms structure allocated during probe().
>>> Don't clean up priv->kms field in case of an error. Otherwise probe
>>> functions might fail after KMS probe deferral.
>>>
>>
>> So just to understand this more, this will happen when master component
>> probe (dpu) succeeded but other sub-component probe (dsi) deferred?
>>
>> Because if master component probe itself deferred it will allocate priv->kms
>> again isnt it and we will not even hit here.
>
> Master probing succeeds (so priv->kms is set), then kms_init fails at
> runtime, during binding of the master device. This results in probe
> deferral from the last component's component_add() function and reprobe
> attempt when possible (once the next device is added or probed). However
> as priv->kms is NULL, probe crashes.
>

Got it, a better commit text would have helped here. Either way,

Reviewed-by: Abhinav Kumar <[email protected]>

2024-04-22 17:23:40

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely



On 4/21/2024 3:35 PM, Dmitry Baryshkov wrote:
> On Sat, Apr 20, 2024 at 04:02:00PM -0700, Abhinav Kumar wrote:
>>
>>
>> On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
>>> MSM display drivers provide kms structure allocated during probe().
>>> Don't clean up priv->kms field in case of an error. Otherwise probe
>>> functions might fail after KMS probe deferral.
>>>
>>
>> So just to understand this more, this will happen when master component
>> probe (dpu) succeeded but other sub-component probe (dsi) deferred?
>>
>> Because if master component probe itself deferred it will allocate priv->kms
>> again isnt it and we will not even hit here.
>
> Master probing succeeds (so priv->kms is set), then kms_init fails at
> runtime, during binding of the master device. This results in probe
> deferral from the last component's component_add() function and reprobe
> attempt when possible (once the next device is added or probed). However
> as priv->kms is NULL, probe crashes.
>
>>
>>> Fixes: a2ab5d5bb6b1 ("drm/msm: allow passing struct msm_kms to msm_drv_probe()")

Actually, Is this Fixes tag correct?

OR is this one better

Fixes: 506efcba3129 ("drm/msm: carve out KMS code from msm_drv.c")


>>> Signed-off-by: Dmitry Baryshkov <[email protected]>
>>> ---
>>> drivers/gpu/drm/msm/msm_kms.c | 1 -
>>> 1 file changed, 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
>>> index af6a6fcb1173..6749f0fbca96 100644
>>> --- a/drivers/gpu/drm/msm/msm_kms.c
>>> +++ b/drivers/gpu/drm/msm/msm_kms.c
>>> @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
>>> ret = priv->kms_init(ddev);
>>> if (ret) {
>>> DRM_DEV_ERROR(dev, "failed to load kms\n");
>>> - priv->kms = NULL;
>>> return ret;
>>> }
>>>
>

2024-04-22 18:18:01

by Abhinav Kumar

[permalink] [raw]
Subject: Re: [PATCH 2/3] drm/msm/mdp4: don't destroy mdp4_kms in mdp4_kms_init error path



On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> Since commit 3c74682637e6 ("drm/msm/mdp4: move resource allocation to
> the _probe function") the mdp4_kms data is allocated during probe. It is
> an error to destroy it during mdp4_kms_init(), as the data is still
> referenced by the drivers's data and can be used later in case of probe
> deferral.
>

mdp4_destroy() currently detaches mmu, calls msm_kms_destroy() which
destroys pending timers and releases refcount on the aspace.

It does not touch the mdp4_kms as that one is devm managed.

In the comment
https://patchwork.freedesktop.org/patch/590411/?series=132664&rev=1#comment_1074306,
we had discussed that the last component's reprobe attempt is affected
(which is not dpu or mdp4 or mdp5 right? )

If it was an interface (such as DSI OR DP), is it the aspace detach
which is causing the crash?

Another note is, mdp5 needs the same fix in that case.

dpu_kms_init() looks fine.

> Fixes: 3c74682637e6 ("drm/msm/mdp4: move resource allocation to the _probe function")
> Signed-off-by: Dmitry Baryshkov <[email protected]>
> ---
> drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 28 +++++++++-------------------
> 1 file changed, 9 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> index 4ba1cb74ad76..4c5f72b7e0e5 100644
> --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> @@ -392,7 +392,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> ret = mdp_kms_init(&mdp4_kms->base, &kms_funcs);
> if (ret) {
> DRM_DEV_ERROR(dev->dev, "failed to init kms\n");
> - goto fail;
> + return ret;
> }
>
> kms = priv->kms;
> @@ -403,7 +403,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> ret = regulator_enable(mdp4_kms->vdd);
> if (ret) {
> DRM_DEV_ERROR(dev->dev, "failed to enable regulator vdd: %d\n", ret);
> - goto fail;
> + return ret;
> }
> }
>
> @@ -414,8 +414,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> if (major != 4) {
> DRM_DEV_ERROR(dev->dev, "unexpected MDP version: v%d.%d\n",
> major, minor);
> - ret = -ENXIO;
> - goto fail;
> + return -ENXIO;
> }
>
> mdp4_kms->rev = minor;
> @@ -423,8 +422,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> if (mdp4_kms->rev >= 2) {
> if (!mdp4_kms->lut_clk) {
> DRM_DEV_ERROR(dev->dev, "failed to get lut_clk\n");
> - ret = -ENODEV;
> - goto fail;
> + return -ENODEV;
> }
> clk_set_rate(mdp4_kms->lut_clk, max_clk);
> }
> @@ -445,8 +443,7 @@ static int mdp4_kms_init(struct drm_device *dev)
>
> mmu = msm_iommu_new(&pdev->dev, 0);
> if (IS_ERR(mmu)) {
> - ret = PTR_ERR(mmu);
> - goto fail;
> + return PTR_ERR(mmu);
> } else if (!mmu) {
> DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
> "contig buffers for scanout\n");
> @@ -458,8 +455,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> if (IS_ERR(aspace)) {
> if (!IS_ERR(mmu))
> mmu->funcs->destroy(mmu);
> - ret = PTR_ERR(aspace);
> - goto fail;
> + return PTR_ERR(aspace);
> }
>
> kms->aspace = aspace;
> @@ -468,7 +464,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> ret = modeset_init(mdp4_kms);
> if (ret) {
> DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret);
> - goto fail;
> + return ret;
> }
>
> mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC | MSM_BO_SCANOUT);
> @@ -476,14 +472,14 @@ static int mdp4_kms_init(struct drm_device *dev)
> ret = PTR_ERR(mdp4_kms->blank_cursor_bo);
> DRM_DEV_ERROR(dev->dev, "could not allocate blank-cursor bo: %d\n", ret);
> mdp4_kms->blank_cursor_bo = NULL;
> - goto fail;
> + return ret;
> }
>
> ret = msm_gem_get_and_pin_iova(mdp4_kms->blank_cursor_bo, kms->aspace,
> &mdp4_kms->blank_cursor_iova);
> if (ret) {
> DRM_DEV_ERROR(dev->dev, "could not pin blank-cursor bo: %d\n", ret);
> - goto fail;
> + return ret;
> }
>
> dev->mode_config.min_width = 0;
> @@ -492,12 +488,6 @@ static int mdp4_kms_init(struct drm_device *dev)
> dev->mode_config.max_height = 2048;
>
> return 0;
> -
> -fail:
> - if (kms)
> - mdp4_destroy(kms);
> -
> - return ret;
> }
>
> static const struct dev_pm_ops mdp4_pm_ops = {
>

2024-04-22 19:44:35

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely

On Mon, Apr 22, 2024 at 10:23:18AM -0700, Abhinav Kumar wrote:
>
>
> On 4/21/2024 3:35 PM, Dmitry Baryshkov wrote:
> > On Sat, Apr 20, 2024 at 04:02:00PM -0700, Abhinav Kumar wrote:
> > >
> > >
> > > On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> > > > MSM display drivers provide kms structure allocated during probe().
> > > > Don't clean up priv->kms field in case of an error. Otherwise probe
> > > > functions might fail after KMS probe deferral.
> > > >
> > >
> > > So just to understand this more, this will happen when master component
> > > probe (dpu) succeeded but other sub-component probe (dsi) deferred?
> > >
> > > Because if master component probe itself deferred it will allocate priv->kms
> > > again isnt it and we will not even hit here.
> >
> > Master probing succeeds (so priv->kms is set), then kms_init fails at
> > runtime, during binding of the master device. This results in probe
> > deferral from the last component's component_add() function and reprobe
> > attempt when possible (once the next device is added or probed). However
> > as priv->kms is NULL, probe crashes.
> >
> > >
> > > > Fixes: a2ab5d5bb6b1 ("drm/msm: allow passing struct msm_kms to msm_drv_probe()")
>
> Actually, Is this Fixes tag correct?
>
> OR is this one better
>
> Fixes: 506efcba3129 ("drm/msm: carve out KMS code from msm_drv.c")

No. The issue existed even before the carve-out.

>
>
> > > > Signed-off-by: Dmitry Baryshkov <[email protected]>
> > > > ---
> > > > drivers/gpu/drm/msm/msm_kms.c | 1 -
> > > > 1 file changed, 1 deletion(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/msm/msm_kms.c b/drivers/gpu/drm/msm/msm_kms.c
> > > > index af6a6fcb1173..6749f0fbca96 100644
> > > > --- a/drivers/gpu/drm/msm/msm_kms.c
> > > > +++ b/drivers/gpu/drm/msm/msm_kms.c
> > > > @@ -244,7 +244,6 @@ int msm_drm_kms_init(struct device *dev, const struct drm_driver *drv)
> > > > ret = priv->kms_init(ddev);
> > > > if (ret) {
> > > > DRM_DEV_ERROR(dev, "failed to load kms\n");
> > > > - priv->kms = NULL;
> > > > return ret;
> > > > }
> > > >
> >

--
With best wishes
Dmitry

2024-04-22 19:45:50

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/msm: don't clean up priv->kms prematurely

On Mon, Apr 22, 2024 at 09:12:20AM -0700, Abhinav Kumar wrote:
>
>
> On 4/21/2024 3:35 PM, Dmitry Baryshkov wrote:
> > On Sat, Apr 20, 2024 at 04:02:00PM -0700, Abhinav Kumar wrote:
> > >
> > >
> > > On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> > > > MSM display drivers provide kms structure allocated during probe().
> > > > Don't clean up priv->kms field in case of an error. Otherwise probe
> > > > functions might fail after KMS probe deferral.
> > > >
> > >
> > > So just to understand this more, this will happen when master component
> > > probe (dpu) succeeded but other sub-component probe (dsi) deferred?
> > >
> > > Because if master component probe itself deferred it will allocate priv->kms
> > > again isnt it and we will not even hit here.
> >
> > Master probing succeeds (so priv->kms is set), then kms_init fails at
> > runtime, during binding of the master device. This results in probe
> > deferral from the last component's component_add() function and reprobe
> > attempt when possible (once the next device is added or probed). However
> > as priv->kms is NULL, probe crashes.
> >
>
> Got it, a better commit text would have helped here. Either way,

I'll update the commit text with the text above.

>
> Reviewed-by: Abhinav Kumar <[email protected]>

--
With best wishes
Dmitry

2024-04-22 19:46:26

by Dmitry Baryshkov

[permalink] [raw]
Subject: Re: [PATCH 2/3] drm/msm/mdp4: don't destroy mdp4_kms in mdp4_kms_init error path

On Mon, Apr 22, 2024 at 11:17:15AM -0700, Abhinav Kumar wrote:
>
>
> On 4/19/2024 7:33 PM, Dmitry Baryshkov wrote:
> > Since commit 3c74682637e6 ("drm/msm/mdp4: move resource allocation to
> > the _probe function") the mdp4_kms data is allocated during probe. It is
> > an error to destroy it during mdp4_kms_init(), as the data is still
> > referenced by the drivers's data and can be used later in case of probe
> > deferral.
> >
>
> mdp4_destroy() currently detaches mmu, calls msm_kms_destroy() which
> destroys pending timers and releases refcount on the aspace.
>
> It does not touch the mdp4_kms as that one is devm managed.
>
> In the comment https://patchwork.freedesktop.org/patch/590411/?series=132664&rev=1#comment_1074306,
> we had discussed that the last component's reprobe attempt is affected
> (which is not dpu or mdp4 or mdp5 right? )
>
> If it was an interface (such as DSI OR DP), is it the aspace detach which is
> causing the crash?

I should have retained the trace log. I'll trigger the issue and post the trace.

>
> Another note is, mdp5 needs the same fix in that case.
>
> dpu_kms_init() looks fine.
>
> > Fixes: 3c74682637e6 ("drm/msm/mdp4: move resource allocation to the _probe function")
> > Signed-off-by: Dmitry Baryshkov <[email protected]>
> > ---
> > drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c | 28 +++++++++-------------------
> > 1 file changed, 9 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> > index 4ba1cb74ad76..4c5f72b7e0e5 100644
> > --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> > +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
> > @@ -392,7 +392,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > ret = mdp_kms_init(&mdp4_kms->base, &kms_funcs);
> > if (ret) {
> > DRM_DEV_ERROR(dev->dev, "failed to init kms\n");
> > - goto fail;
> > + return ret;
> > }
> > kms = priv->kms;
> > @@ -403,7 +403,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > ret = regulator_enable(mdp4_kms->vdd);
> > if (ret) {
> > DRM_DEV_ERROR(dev->dev, "failed to enable regulator vdd: %d\n", ret);
> > - goto fail;
> > + return ret;
> > }
> > }
> > @@ -414,8 +414,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > if (major != 4) {
> > DRM_DEV_ERROR(dev->dev, "unexpected MDP version: v%d.%d\n",
> > major, minor);
> > - ret = -ENXIO;
> > - goto fail;
> > + return -ENXIO;
> > }
> > mdp4_kms->rev = minor;
> > @@ -423,8 +422,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > if (mdp4_kms->rev >= 2) {
> > if (!mdp4_kms->lut_clk) {
> > DRM_DEV_ERROR(dev->dev, "failed to get lut_clk\n");
> > - ret = -ENODEV;
> > - goto fail;
> > + return -ENODEV;
> > }
> > clk_set_rate(mdp4_kms->lut_clk, max_clk);
> > }
> > @@ -445,8 +443,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > mmu = msm_iommu_new(&pdev->dev, 0);
> > if (IS_ERR(mmu)) {
> > - ret = PTR_ERR(mmu);
> > - goto fail;
> > + return PTR_ERR(mmu);
> > } else if (!mmu) {
> > DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
> > "contig buffers for scanout\n");
> > @@ -458,8 +455,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > if (IS_ERR(aspace)) {
> > if (!IS_ERR(mmu))
> > mmu->funcs->destroy(mmu);
> > - ret = PTR_ERR(aspace);
> > - goto fail;
> > + return PTR_ERR(aspace);
> > }
> > kms->aspace = aspace;
> > @@ -468,7 +464,7 @@ static int mdp4_kms_init(struct drm_device *dev)
> > ret = modeset_init(mdp4_kms);
> > if (ret) {
> > DRM_DEV_ERROR(dev->dev, "modeset_init failed: %d\n", ret);
> > - goto fail;
> > + return ret;
> > }
> > mdp4_kms->blank_cursor_bo = msm_gem_new(dev, SZ_16K, MSM_BO_WC | MSM_BO_SCANOUT);
> > @@ -476,14 +472,14 @@ static int mdp4_kms_init(struct drm_device *dev)
> > ret = PTR_ERR(mdp4_kms->blank_cursor_bo);
> > DRM_DEV_ERROR(dev->dev, "could not allocate blank-cursor bo: %d\n", ret);
> > mdp4_kms->blank_cursor_bo = NULL;
> > - goto fail;
> > + return ret;
> > }
> > ret = msm_gem_get_and_pin_iova(mdp4_kms->blank_cursor_bo, kms->aspace,
> > &mdp4_kms->blank_cursor_iova);
> > if (ret) {
> > DRM_DEV_ERROR(dev->dev, "could not pin blank-cursor bo: %d\n", ret);
> > - goto fail;
> > + return ret;
> > }
> > dev->mode_config.min_width = 0;
> > @@ -492,12 +488,6 @@ static int mdp4_kms_init(struct drm_device *dev)
> > dev->mode_config.max_height = 2048;
> > return 0;
> > -
> > -fail:
> > - if (kms)
> > - mdp4_destroy(kms);
> > -
> > - return ret;
> > }
> > static const struct dev_pm_ops mdp4_pm_ops = {
> >

--
With best wishes
Dmitry