From: Loic Poulain <[email protected]>
For whatever reason, some devices like QCA6390, WCN6855 using ath11k
are not in M3 state during PM resume, but still functional. The
mhi_pm_resume should then not fail in those cases, and let the higher
level device specific stack continue resuming process.
Add a new parameter to mhi_pm_resume, to force resuming, whatever the
current MHI state is. This fixes a regression with non functional
ath11k WiFi after suspend/resume cycle on some machines.
Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
Cc: [email protected] #5.13
Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
Reported-by: Kalle Valo <[email protected]>
Reported-by: Pengyu Ma <[email protected]>
Signed-off-by: Loic Poulain <[email protected]>
[mani: Added comment, bug report, added reported-by tags and CCed stable]
Signed-off-by: Manivannan Sadhasivam <[email protected]>
---
drivers/bus/mhi/core/pm.c | 10 +++++++---
drivers/bus/mhi/pci_generic.c | 2 +-
drivers/net/wireless/ath/ath11k/mhi.c | 6 +++++-
include/linux/mhi.h | 3 ++-
4 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
index 7464f5d09973..4ddd266e042e 100644
--- a/drivers/bus/mhi/core/pm.c
+++ b/drivers/bus/mhi/core/pm.c
@@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
}
EXPORT_SYMBOL_GPL(mhi_pm_suspend);
-int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
+int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
{
struct mhi_chan *itr, *tmp;
struct device *dev = &mhi_cntrl->mhi_dev->dev;
@@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
return -EIO;
- if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
- return -EINVAL;
+ if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
+ dev_warn(dev, "Resuming from non M3 state (%s)\n",
+ TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
+ if (!force)
+ return -EINVAL;
+ }
/* Notify clients about exiting LPM */
list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 9ef41354237c..efd1da66fdf9 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -959,7 +959,7 @@ static int __maybe_unused mhi_pci_runtime_resume(struct device *dev)
return 0; /* Nothing to do at MHI level */
/* Exit M3, transition to M0 state */
- err = mhi_pm_resume(mhi_cntrl);
+ err = mhi_pm_resume(mhi_cntrl, false);
if (err) {
dev_err(&pdev->dev, "failed to resume device: %d\n", err);
goto err_recovery;
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 26c7ae242db6..f1f2fa2d690d 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -533,7 +533,11 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
ret = mhi_pm_suspend(ab_pci->mhi_ctrl);
break;
case ATH11K_MHI_RESUME:
- ret = mhi_pm_resume(ab_pci->mhi_ctrl);
+ /* Do force MHI resume as some devices like QCA6390, WCN6855
+ * are not in M3 state but they are functional. So just ignore
+ * the MHI state while resuming.
+ */
+ ret = mhi_pm_resume(ab_pci->mhi_ctrl, true);
break;
case ATH11K_MHI_TRIGGER_RDDM:
ret = mhi_force_rddm_mode(ab_pci->mhi_ctrl);
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 723985879035..102303288cee 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -660,8 +660,9 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl);
/**
* mhi_pm_resume - Resume MHI from suspended state
* @mhi_cntrl: MHI controller
+ * @force: Force resuming to M0 irrespective of the device MHI state
*/
-int mhi_pm_resume(struct mhi_controller *mhi_cntrl);
+int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force);
/**
* mhi_download_rddm_image - Download ramdump image from device for
--
2.25.1
Hi, this is your Linux kernel regression tracker speaking.
On 06.12.21 17:10, Manivannan Sadhasivam wrote:
> From: Loic Poulain <[email protected]>
>
> For whatever reason, some devices like QCA6390, WCN6855 using ath11k
> are not in M3 state during PM resume, but still functional. The
> mhi_pm_resume should then not fail in those cases, and let the higher
> level device specific stack continue resuming process.
>
> Add a new parameter to mhi_pm_resume, to force resuming, whatever the
> current MHI state is. This fixes a regression with non functional
> ath11k WiFi after suspend/resume cycle on some machines.
>
> Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
>
> Cc: [email protected] #5.13
> Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
> Reported-by: Kalle Valo <[email protected]>
> Reported-by: Pengyu Ma <[email protected]>
FWIW: In case you need to send an improved patch, could you please add
this before the 'Reported-by:' (see (¹) below for the reasoning):
Link: https://lore.kernel.org/regressions/[email protected]/
And if the patch is already good to go: could the subsystem maintainer
please add it when applying? See(¹) for the reasoning.
Thx.
Ciao, Thorsten, your Linux kernel regression tracker.
(¹) Long story: The commit message would benefit from a link to the
regression report on the mailing list, for reasons explained in
Documentation/process/submitting-patches.rst. To quote:
```
If related discussions or any other background information behind the
change can be found on the web, add 'Link:' tags pointing to it. In case
your patch fixes a bug, for example, add a tag with a URL referencing
the report in the mailing list archives or a bug tracker;
```
This concept is old, but the text was reworked recently to make this use
case for the Link: tag clearer. For details see:
https://git.kernel.org/linus/1f57bd42b77c
Yes, that "Link:" is not really crucial; but it's good to have if
someone needs to look into the backstory of this change sometime in the
future. But I care for a different reason. I'm tracking this regression
(and others) with regzbot, my Linux kernel regression tracking bot. This
bot will notice if a patch with a Link: tag to a tracked regression gets
posted and record that, which allowed anyone looking into the regression
to quickly gasp the current status from regzbot's webui
(https://linux-regtracking.leemhuis.info/regzbot ) or its reports. The
bot will also notice if a commit with a Link: tag to a regression report
is applied by Linus and then automatically mark the regression as
resolved then.
IOW: this tag makes my life a regression tracker a lot easier, as I
otherwise have to tell regzbot manually when the fix lands. :-/
P.S.: As a Linux kernel regression tracker I'm getting a lot of reports
on my table. I can only look briefly into most of them. Unfortunately
therefore I sometimes will get things wrong or miss something important.
I hope that's not the case here; if you think it is, don't hesitate to
tell me about it in a public reply. That's in everyone's interest, as
what I wrote above might be misleading to everyone reading this; any
suggestion I gave they thus might sent someone reading this down the
wrong rabbit hole, which none of us wants.
BTW, I have no personal interest in this issue, which is tracked using
regzbot, my Linux kernel regression tracking bot
(https://linux-regtracking.leemhuis.info/regzbot/). I'm only posting
this mail to get things rolling again and hence don't need to be CC on
all further activities wrt to this regression.
#regzbot ^backmonitor:
https://lore.kernel.org/regressions/[email protected]/
Manivannan Sadhasivam <[email protected]> writes:
> From: Loic Poulain <[email protected]>
>
> For whatever reason, some devices like QCA6390, WCN6855 using ath11k
> are not in M3 state during PM resume, but still functional. The
> mhi_pm_resume should then not fail in those cases, and let the higher
> level device specific stack continue resuming process.
>
> Add a new parameter to mhi_pm_resume, to force resuming, whatever the
> current MHI state is. This fixes a regression with non functional
> ath11k WiFi after suspend/resume cycle on some machines.
>
> Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
>
> Cc: [email protected] #5.13
> Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
> Reported-by: Kalle Valo <[email protected]>
> Reported-by: Pengyu Ma <[email protected]>
> Signed-off-by: Loic Poulain <[email protected]>
> [mani: Added comment, bug report, added reported-by tags and CCed stable]
> Signed-off-by: Manivannan Sadhasivam <[email protected]>
Thanks! I now tested this patch on top v5.16-rc4 using QCA6390 and
firmware WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1, no issues found:
Tested-by: Kalle Valo <[email protected]>
I'm not expecting any conflicts with ath11k, so please take this via the
mhi tree. It would be really good to get this regression fixed in v5.16,
so is it possible to send this to -rc releases?
For the ath11k part:
Acked-by: Kalle Valo <[email protected]>
--
https://patchwork.kernel.org/project/linux-wireless/list/
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
Hi Mani,
On 12/6/2021 8:10 AM, Manivannan Sadhasivam wrote:
> From: Loic Poulain <[email protected]>
>
> For whatever reason, some devices like QCA6390, WCN6855 using ath11k
> are not in M3 state during PM resume, but still functional. The
> mhi_pm_resume should then not fail in those cases, and let the higher
> level device specific stack continue resuming process.
>
> Add a new parameter to mhi_pm_resume, to force resuming, whatever the
> current MHI state is. This fixes a regression with non functional
> ath11k WiFi after suspend/resume cycle on some machines.
>
> Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
>
> Cc: [email protected] #5.13
> Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
> Reported-by: Kalle Valo <[email protected]>
> Reported-by: Pengyu Ma <[email protected]>
> Signed-off-by: Loic Poulain <[email protected]>
> [mani: Added comment, bug report, added reported-by tags and CCed stable]
> Signed-off-by: Manivannan Sadhasivam <[email protected]>
> ---
> drivers/bus/mhi/core/pm.c | 10 +++++++---
> drivers/bus/mhi/pci_generic.c | 2 +-
> drivers/net/wireless/ath/ath11k/mhi.c | 6 +++++-
> include/linux/mhi.h | 3 ++-
> 4 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> index 7464f5d09973..4ddd266e042e 100644
> --- a/drivers/bus/mhi/core/pm.c
> +++ b/drivers/bus/mhi/core/pm.c
> @@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
> }
> EXPORT_SYMBOL_GPL(mhi_pm_suspend);
>
> -int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> +int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
> {
> struct mhi_chan *itr, *tmp;
> struct device *dev = &mhi_cntrl->mhi_dev->dev;
> @@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
> return -EIO;
>
> - if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
> - return -EINVAL;
> + if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
in case if mhi_get_mhi_state(mhi_cntrl) returns SYS_ERR (assuming while
doing this check SYS_ERR is set) do we still want to continue pm resume
when force is true? Just want to make sure SYS_ERR handling with and
without this change remains the same or atleast does not cause any
regression with this change. or if we need to continue pm resume only
for MHI_STATE_RESET when MHI_STATE_M3 is not set?
> + dev_warn(dev, "Resuming from non M3 state (%s)\n",
> + TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
> + if (!force)
> + return -EINVAL;
> + }
>
[..]
Thanks,
Hemant
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project
On Tue, Dec 07, 2021 at 03:41:42PM -0800, Hemant Kumar wrote:
> Hi Mani,
>
> On 12/6/2021 8:10 AM, Manivannan Sadhasivam wrote:
> > From: Loic Poulain <[email protected]>
> >
> > For whatever reason, some devices like QCA6390, WCN6855 using ath11k
> > are not in M3 state during PM resume, but still functional. The
> > mhi_pm_resume should then not fail in those cases, and let the higher
> > level device specific stack continue resuming process.
> >
> > Add a new parameter to mhi_pm_resume, to force resuming, whatever the
> > current MHI state is. This fixes a regression with non functional
> > ath11k WiFi after suspend/resume cycle on some machines.
> >
> > Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
> >
> > Cc: [email protected] #5.13
> > Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
> > Reported-by: Kalle Valo <[email protected]>
> > Reported-by: Pengyu Ma <[email protected]>
> > Signed-off-by: Loic Poulain <[email protected]>
> > [mani: Added comment, bug report, added reported-by tags and CCed stable]
> > Signed-off-by: Manivannan Sadhasivam <[email protected]>
> > ---
> > drivers/bus/mhi/core/pm.c | 10 +++++++---
> > drivers/bus/mhi/pci_generic.c | 2 +-
> > drivers/net/wireless/ath/ath11k/mhi.c | 6 +++++-
> > include/linux/mhi.h | 3 ++-
> > 4 files changed, 15 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> > index 7464f5d09973..4ddd266e042e 100644
> > --- a/drivers/bus/mhi/core/pm.c
> > +++ b/drivers/bus/mhi/core/pm.c
> > @@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
> > }
> > EXPORT_SYMBOL_GPL(mhi_pm_suspend);
> > -int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> > +int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
> > {
> > struct mhi_chan *itr, *tmp;
> > struct device *dev = &mhi_cntrl->mhi_dev->dev;
> > @@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> > if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
> > return -EIO;
> > - if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
> > - return -EINVAL;
> > + if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
> in case if mhi_get_mhi_state(mhi_cntrl) returns SYS_ERR (assuming while
> doing this check SYS_ERR is set) do we still want to continue pm resume when
> force is true? Just want to make sure SYS_ERR handling with and without this
> change remains the same or atleast does not cause any regression with this
> change. or if we need to continue pm resume only for MHI_STATE_RESET when
> MHI_STATE_M3 is not set?
SYS_ERR state is a valid case while resuming from suspend. The "force" flag is
supposed to be used by controllers that goes to a weird state like RESET. If we
just add check for RESET, then we might get another scenario in future. That's
why the "force" flag made sense to me.
If we want to handle SYS_ERR then we need to check for that before M3 and that
can be done in a separate patch. But since we didn't hit this scenario till now,
let's handle it later if needed.
Thanks,
Mani
> > + dev_warn(dev, "Resuming from non M3 state (%s)\n",
> > + TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
> > + if (!force)
> > + return -EINVAL;
> > + }
> [..]
>
> Thanks,
> Hemant
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a
> Linux Foundation Collaborative Project
On Mon, Dec 06, 2021 at 09:40:59PM +0530, Manivannan Sadhasivam wrote:
> From: Loic Poulain <[email protected]>
>
> For whatever reason, some devices like QCA6390, WCN6855 using ath11k
> are not in M3 state during PM resume, but still functional. The
> mhi_pm_resume should then not fail in those cases, and let the higher
> level device specific stack continue resuming process.
>
> Add a new parameter to mhi_pm_resume, to force resuming, whatever the
> current MHI state is. This fixes a regression with non functional
> ath11k WiFi after suspend/resume cycle on some machines.
>
> Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179
>
> Cc: [email protected] #5.13
> Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
> Reported-by: Kalle Valo <[email protected]>
> Reported-by: Pengyu Ma <[email protected]>
> Signed-off-by: Loic Poulain <[email protected]>
> [mani: Added comment, bug report, added reported-by tags and CCed stable]
> Signed-off-by: Manivannan Sadhasivam <[email protected]>
Applied to mhi-fixes! Will be submitted for v5.16-rcX.
Thanks,
Mani
> ---
> drivers/bus/mhi/core/pm.c | 10 +++++++---
> drivers/bus/mhi/pci_generic.c | 2 +-
> drivers/net/wireless/ath/ath11k/mhi.c | 6 +++++-
> include/linux/mhi.h | 3 ++-
> 4 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
> index 7464f5d09973..4ddd266e042e 100644
> --- a/drivers/bus/mhi/core/pm.c
> +++ b/drivers/bus/mhi/core/pm.c
> @@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
> }
> EXPORT_SYMBOL_GPL(mhi_pm_suspend);
>
> -int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> +int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
> {
> struct mhi_chan *itr, *tmp;
> struct device *dev = &mhi_cntrl->mhi_dev->dev;
> @@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
> if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
> return -EIO;
>
> - if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
> - return -EINVAL;
> + if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
> + dev_warn(dev, "Resuming from non M3 state (%s)\n",
> + TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
> + if (!force)
> + return -EINVAL;
> + }
>
> /* Notify clients about exiting LPM */
> list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
> diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
> index 9ef41354237c..efd1da66fdf9 100644
> --- a/drivers/bus/mhi/pci_generic.c
> +++ b/drivers/bus/mhi/pci_generic.c
> @@ -959,7 +959,7 @@ static int __maybe_unused mhi_pci_runtime_resume(struct device *dev)
> return 0; /* Nothing to do at MHI level */
>
> /* Exit M3, transition to M0 state */
> - err = mhi_pm_resume(mhi_cntrl);
> + err = mhi_pm_resume(mhi_cntrl, false);
> if (err) {
> dev_err(&pdev->dev, "failed to resume device: %d\n", err);
> goto err_recovery;
> diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
> index 26c7ae242db6..f1f2fa2d690d 100644
> --- a/drivers/net/wireless/ath/ath11k/mhi.c
> +++ b/drivers/net/wireless/ath/ath11k/mhi.c
> @@ -533,7 +533,11 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
> ret = mhi_pm_suspend(ab_pci->mhi_ctrl);
> break;
> case ATH11K_MHI_RESUME:
> - ret = mhi_pm_resume(ab_pci->mhi_ctrl);
> + /* Do force MHI resume as some devices like QCA6390, WCN6855
> + * are not in M3 state but they are functional. So just ignore
> + * the MHI state while resuming.
> + */
> + ret = mhi_pm_resume(ab_pci->mhi_ctrl, true);
> break;
> case ATH11K_MHI_TRIGGER_RDDM:
> ret = mhi_force_rddm_mode(ab_pci->mhi_ctrl);
> diff --git a/include/linux/mhi.h b/include/linux/mhi.h
> index 723985879035..102303288cee 100644
> --- a/include/linux/mhi.h
> +++ b/include/linux/mhi.h
> @@ -660,8 +660,9 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl);
> /**
> * mhi_pm_resume - Resume MHI from suspended state
> * @mhi_cntrl: MHI controller
> + * @force: Force resuming to M0 irrespective of the device MHI state
> */
> -int mhi_pm_resume(struct mhi_controller *mhi_cntrl);
> +int mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force);
>
> /**
> * mhi_download_rddm_image - Download ramdump image from device for
> --
> 2.25.1
>