In function acpi_idle_do_entry(), an ioport access is used for
dummy wait to guarantee hardware behavior. But it could trigger
unnecessary VMexit if kernel is running as guest in virtualization
environment.
If it's in virtualization environment, the deeper C state enter
operation (inb()) will trap to hypervisor. It's not needed to do
dummy wait after the inb() call. So we could just remove the
dummy io port access to avoid unnecessary VMexit.
And keep dummy io port access to maintain timing for native
environment.
Signed-off-by: Yin Fengwei <[email protected]>
---
ChangeLog:
v3 -> v4:
- Drop overengineered function pointer and do check whether
we are in guest before dummy inl call.
v2 -> v3:
- Remove dummy io port access totally for virtualization env.
v1 -> v2:
- Use ndelay instead of dead loop for dummy delay.
drivers/acpi/processor_idle.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ed56c6d20b08..2ae95df2e74f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
return bm_status;
}
+static void wait_for_freeze(void)
+{
+#ifdef CONFIG_X86
+ /* No delay is needed if we are in guest */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return;
+#endif
+ /* Dummy wait op - must do something useless after P_LVL2 read
+ because chipsets cannot guarantee that STPCLK# signal
+ gets asserted in time to freeze execution properly. */
+ inl(acpi_gbl_FADT.xpm_timer_block.address);
+}
+
/**
* acpi_idle_do_entry - enter idle state using the appropriate method
* @cx: cstate data
@@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
} else {
/* IO port based C-state */
inb(cx->address);
- /* Dummy wait op - must do something useless after P_LVL2 read
- because chipsets cannot guarantee that STPCLK# signal
- gets asserted in time to freeze execution properly. */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ wait_for_freeze();
}
}
@@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
safe_halt();
else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
inb(cx->address);
- /* See comment in acpi_idle_do_entry() */
- inl(acpi_gbl_FADT.xpm_timer_block.address);
+ wait_for_freeze();
} else
return -ENODEV;
}
--
2.19.1
On Thu, Oct 24, 2019 at 9:04 AM Yin Fengwei <[email protected]> wrote:
>
> In function acpi_idle_do_entry(), an ioport access is used for
> dummy wait to guarantee hardware behavior. But it could trigger
> unnecessary VMexit if kernel is running as guest in virtualization
> environment.
>
> If it's in virtualization environment, the deeper C state enter
> operation (inb()) will trap to hypervisor. It's not needed to do
> dummy wait after the inb() call. So we could just remove the
> dummy io port access to avoid unnecessary VMexit.
>
> And keep dummy io port access to maintain timing for native
> environment.
>
> Signed-off-by: Yin Fengwei <[email protected]>
> ---
> ChangeLog:
> v3 -> v4:
> - Drop overengineered function pointer and do check whether
> we are in guest before dummy inl call.
>
> v2 -> v3:
> - Remove dummy io port access totally for virtualization env.
>
> v1 -> v2:
> - Use ndelay instead of dead loop for dummy delay.
>
> drivers/acpi/processor_idle.c | 21 +++++++++++++++------
> 1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
> index ed56c6d20b08..2ae95df2e74f 100644
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
> return bm_status;
> }
>
> +static void wait_for_freeze(void)
> +{
> +#ifdef CONFIG_X86
> + /* No delay is needed if we are in guest */
> + if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
> + return;
> +#endif
> + /* Dummy wait op - must do something useless after P_LVL2 read
> + because chipsets cannot guarantee that STPCLK# signal
> + gets asserted in time to freeze execution properly. */
> + inl(acpi_gbl_FADT.xpm_timer_block.address);
> +}
> +
> /**
> * acpi_idle_do_entry - enter idle state using the appropriate method
> * @cx: cstate data
> @@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
> } else {
> /* IO port based C-state */
> inb(cx->address);
> - /* Dummy wait op - must do something useless after P_LVL2 read
> - because chipsets cannot guarantee that STPCLK# signal
> - gets asserted in time to freeze execution properly. */
> - inl(acpi_gbl_FADT.xpm_timer_block.address);
> + wait_for_freeze();
> }
> }
>
> @@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
> safe_halt();
> else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
> inb(cx->address);
> - /* See comment in acpi_idle_do_entry() */
> - inl(acpi_gbl_FADT.xpm_timer_block.address);
> + wait_for_freeze();
> } else
> return -ENODEV;
> }
> --
Applying as 5.5 material, thanks!
On 10/25/2019 5:06 PM, Rafael J. Wysocki wrote:
> On Thu, Oct 24, 2019 at 9:04 AM Yin Fengwei <[email protected]> wrote:
>>
>> In function acpi_idle_do_entry(), an ioport access is used for
>> dummy wait to guarantee hardware behavior. But it could trigger
>> unnecessary VMexit if kernel is running as guest in virtualization
>> environment.
>>
>> If it's in virtualization environment, the deeper C state enter
>> operation (inb()) will trap to hypervisor. It's not needed to do
>> dummy wait after the inb() call. So we could just remove the
>> dummy io port access to avoid unnecessary VMexit.
>>
>> And keep dummy io port access to maintain timing for native
>> environment.
>>
>> Signed-off-by: Yin Fengwei <[email protected]>
>> ---
>> ChangeLog:
>> v3 -> v4:
>> - Drop overengineered function pointer and do check whether
>> we are in guest before dummy inl call.
>>
>> v2 -> v3:
>> - Remove dummy io port access totally for virtualization env.
>>
>> v1 -> v2:
>> - Use ndelay instead of dead loop for dummy delay.
>>
>> drivers/acpi/processor_idle.c | 21 +++++++++++++++------
>> 1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
>> index ed56c6d20b08..2ae95df2e74f 100644
>> --- a/drivers/acpi/processor_idle.c
>> +++ b/drivers/acpi/processor_idle.c
>> @@ -642,6 +642,19 @@ static int acpi_idle_bm_check(void)
>> return bm_status;
>> }
>>
>> +static void wait_for_freeze(void)
>> +{
>> +#ifdef CONFIG_X86
>> + /* No delay is needed if we are in guest */
>> + if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
>> + return;
>> +#endif
>> + /* Dummy wait op - must do something useless after P_LVL2 read
>> + because chipsets cannot guarantee that STPCLK# signal
>> + gets asserted in time to freeze execution properly. */
>> + inl(acpi_gbl_FADT.xpm_timer_block.address);
>> +}
>> +
>> /**
>> * acpi_idle_do_entry - enter idle state using the appropriate method
>> * @cx: cstate data
>> @@ -658,10 +671,7 @@ static void __cpuidle acpi_idle_do_entry(struct acpi_processor_cx *cx)
>> } else {
>> /* IO port based C-state */
>> inb(cx->address);
>> - /* Dummy wait op - must do something useless after P_LVL2 read
>> - because chipsets cannot guarantee that STPCLK# signal
>> - gets asserted in time to freeze execution properly. */
>> - inl(acpi_gbl_FADT.xpm_timer_block.address);
>> + wait_for_freeze();
>> }
>> }
>>
>> @@ -682,8 +692,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
>> safe_halt();
>> else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
>> inb(cx->address);
>> - /* See comment in acpi_idle_do_entry() */
>> - inl(acpi_gbl_FADT.xpm_timer_block.address);
>> + wait_for_freeze();
>> } else
>> return -ENODEV;
>> }
>> --
>
> Applying as 5.5 material, thanks!
Thanks a lot.
Regards
Yin, Fengwei
>