2021-03-30 22:10:57

by srinivas pandruvada

[permalink] [raw]
Subject: [PATCH] platform/x86: ISST: Account for increased timeout in some cases

In some cases when firmware is busy or updating, some mailbox commands
still timeout on some newer CPUs. To fix this issue, change how we
process timeout.

With this change, replaced timeout from using simple count with real
timeout in micro-seconds using ktime. When the command response takes
more than average processing time, yield to other tasks. The worst case
timeout is extended upto 1 milli-second.

Signed-off-by: Srinivas Pandruvada <[email protected]>
---
.../intel_speed_select_if/isst_if_mbox_pci.c | 33 +++++++++++++------
1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
index a2a2d923e60c..df1fc6c719f3 100644
--- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
+++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
@@ -21,12 +21,16 @@
#define PUNIT_MAILBOX_BUSY_BIT 31

/*
- * The average time to complete some commands is about 40us. The current
- * count is enough to satisfy 40us. But when the firmware is very busy, this
- * causes timeout occasionally. So increase to deal with some worst case
- * scenarios. Most of the command still complete in few us.
+ * The average time to complete mailbox commands is less than 40us. Most of
+ * the commands complete in few micro seconds. But the same firmware handles
+ * requests from all power management features.
+ * We can create a scenario where we flood the firmware with requests then
+ * the mailbox response can be delayed for 100s of micro seconds. So define
+ * two timeouts. One for average case and one for long.
+ * If the firmware is taking more than average, just call cond_resched().
*/
-#define OS_MAILBOX_RETRY_COUNT 100
+#define OS_MAILBOX_TIMEOUT_AVG_US 40
+#define OS_MAILBOX_TIMEOUT_MAX_US 1000

struct isst_if_device {
struct mutex mutex;
@@ -35,11 +39,13 @@ struct isst_if_device {
static int isst_if_mbox_cmd(struct pci_dev *pdev,
struct isst_if_mbox_cmd *mbox_cmd)
{
- u32 retries, data;
+ s64 tm_delta = 0;
+ ktime_t tm;
+ u32 data;
int ret;

/* Poll for rb bit == 0 */
- retries = OS_MAILBOX_RETRY_COUNT;
+ tm = ktime_get();
do {
ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
&data);
@@ -48,11 +54,14 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,

if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
+ tm_delta = ktime_us_delta(ktime_get(), tm);
+ if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+ cond_resched();
continue;
}
ret = 0;
break;
- } while (--retries);
+ } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);

if (ret)
return ret;
@@ -74,7 +83,8 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
return ret;

/* Poll for rb bit == 0 */
- retries = OS_MAILBOX_RETRY_COUNT;
+ tm_delta = 0;
+ tm = ktime_get();
do {
ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
&data);
@@ -83,6 +93,9 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,

if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
ret = -EBUSY;
+ tm_delta = ktime_us_delta(ktime_get(), tm);
+ if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
+ cond_resched();
continue;
}

@@ -96,7 +109,7 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
mbox_cmd->resp_data = data;
ret = 0;
break;
- } while (--retries);
+ } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);

return ret;
}
--
2.25.4


2021-04-07 20:58:24

by Hans de Goede

[permalink] [raw]
Subject: Re: [PATCH] platform/x86: ISST: Account for increased timeout in some cases

Hi,

On 3/31/21 12:08 AM, Srinivas Pandruvada wrote:
> In some cases when firmware is busy or updating, some mailbox commands
> still timeout on some newer CPUs. To fix this issue, change how we
> process timeout.
>
> With this change, replaced timeout from using simple count with real
> timeout in micro-seconds using ktime. When the command response takes
> more than average processing time, yield to other tasks. The worst case
> timeout is extended upto 1 milli-second.
>
> Signed-off-by: Srinivas Pandruvada <[email protected]>

Thank you for your patch, I've applied this patch to my review-hans
branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up in my review-hans branch once I've pushed my
local branch there, which might take a while.

Once I've run some tests on this branch the patches there will be
added to the platform-drivers-x86/for-next branch and eventually
will be included in the pdx86 pull-request to Linus for the next
merge-window.

Regards,

Hans


> ---
> .../intel_speed_select_if/isst_if_mbox_pci.c | 33 +++++++++++++------
> 1 file changed, 23 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> index a2a2d923e60c..df1fc6c719f3 100644
> --- a/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_mbox_pci.c
> @@ -21,12 +21,16 @@
> #define PUNIT_MAILBOX_BUSY_BIT 31
>
> /*
> - * The average time to complete some commands is about 40us. The current
> - * count is enough to satisfy 40us. But when the firmware is very busy, this
> - * causes timeout occasionally. So increase to deal with some worst case
> - * scenarios. Most of the command still complete in few us.
> + * The average time to complete mailbox commands is less than 40us. Most of
> + * the commands complete in few micro seconds. But the same firmware handles
> + * requests from all power management features.
> + * We can create a scenario where we flood the firmware with requests then
> + * the mailbox response can be delayed for 100s of micro seconds. So define
> + * two timeouts. One for average case and one for long.
> + * If the firmware is taking more than average, just call cond_resched().
> */
> -#define OS_MAILBOX_RETRY_COUNT 100
> +#define OS_MAILBOX_TIMEOUT_AVG_US 40
> +#define OS_MAILBOX_TIMEOUT_MAX_US 1000
>
> struct isst_if_device {
> struct mutex mutex;
> @@ -35,11 +39,13 @@ struct isst_if_device {
> static int isst_if_mbox_cmd(struct pci_dev *pdev,
> struct isst_if_mbox_cmd *mbox_cmd)
> {
> - u32 retries, data;
> + s64 tm_delta = 0;
> + ktime_t tm;
> + u32 data;
> int ret;
>
> /* Poll for rb bit == 0 */
> - retries = OS_MAILBOX_RETRY_COUNT;
> + tm = ktime_get();
> do {
> ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
> &data);
> @@ -48,11 +54,14 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>
> if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
> ret = -EBUSY;
> + tm_delta = ktime_us_delta(ktime_get(), tm);
> + if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
> + cond_resched();
> continue;
> }
> ret = 0;
> break;
> - } while (--retries);
> + } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
>
> if (ret)
> return ret;
> @@ -74,7 +83,8 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
> return ret;
>
> /* Poll for rb bit == 0 */
> - retries = OS_MAILBOX_RETRY_COUNT;
> + tm_delta = 0;
> + tm = ktime_get();
> do {
> ret = pci_read_config_dword(pdev, PUNIT_MAILBOX_INTERFACE,
> &data);
> @@ -83,6 +93,9 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
>
> if (data & BIT_ULL(PUNIT_MAILBOX_BUSY_BIT)) {
> ret = -EBUSY;
> + tm_delta = ktime_us_delta(ktime_get(), tm);
> + if (tm_delta > OS_MAILBOX_TIMEOUT_AVG_US)
> + cond_resched();
> continue;
> }
>
> @@ -96,7 +109,7 @@ static int isst_if_mbox_cmd(struct pci_dev *pdev,
> mbox_cmd->resp_data = data;
> ret = 0;
> break;
> - } while (--retries);
> + } while (tm_delta < OS_MAILBOX_TIMEOUT_MAX_US);
>
> return ret;
> }
>