This patchset enables the usage of haltpoll governer on arm64. This is
specifically interesting for KVM guests by reducing the IPC latencies.
Here are some benchmarks without/with haltpoll for a KVM guest:
a) without haltpoll:
perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes
Total time: 8.138 [sec]
8.138094 usecs/op
122878 ops/sec
b) with haltpoll:
perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes
Total time: 5.003 [sec]
5.003085 usecs/op
199876 ops/sec
v4 changes from v3:
- change 7/8 per Rafael input: drop the parens and use ret for the final check
- add 8/8 which renames the guard for building poll_state
v3 changes from v2:
- fix 1/7 per Petr Mladek - remove ARCH_HAS_CPU_RELAX from arch/x86/Kconfig
- add Ack-by from Rafael Wysocki on 2/7
v2 changes from v1:
- added patch 7 where we change cpu_relax with smp_cond_load_relaxed per PeterZ
(this improves by 50% at least the CPU cycles consumed in the tests above:
10,716,881,137 now vs 14,503,014,257 before)
- removed the ifdef from patch 1 per RafaelW
Joao Martins (6):
x86: Move ARCH_HAS_CPU_RELAX to arch
x86/kvm: Move haltpoll_want() to be arch defined
governors/haltpoll: Drop kvm_para_available() check
arm64: Select ARCH_HAS_CPU_RELAX
arm64: Define TIF_POLLING_NRFLAG
cpuidle-haltpoll: ARM64 support
Mihai Carabas (2):
cpuidle/poll_state: replace cpu_relax with smp_cond_load_relaxed
cpuidle: replace with HAS_CPU_RELAX with HAS_WANTS_IDLE_POLL
arch/Kconfig | 3 +++
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/thread_info.h | 6 ++++++
arch/x86/Kconfig | 4 +---
arch/x86/include/asm/cpuidle_haltpoll.h | 1 +
arch/x86/kernel/kvm.c | 10 ++++++++++
drivers/acpi/processor_idle.c | 4 ++--
drivers/cpuidle/Kconfig | 4 ++--
drivers/cpuidle/Makefile | 2 +-
drivers/cpuidle/cpuidle-haltpoll.c | 8 ++------
drivers/cpuidle/governors/haltpoll.c | 5 +----
drivers/cpuidle/poll_state.c | 15 ++++++++++-----
include/linux/cpuidle.h | 2 +-
include/linux/cpuidle_haltpoll.h | 5 +++++
14 files changed, 46 insertions(+), 24 deletions(-)
--
1.8.3.1
From: Joao Martins <[email protected]>
The default idle method for arm64 is WFI and it therefore
unconditionally requires the reschedule interrupt when idle.
Commit 842514849a61 ("arm64: Remove TIF_POLLING_NRFLAG") had
reverted it because WFI was the only idle method. ARM64 support
for haltpoll means that poll_idle() polls for TIF_POLLING_NRFLAG,
so define on arm64 *only if* haltpoll is built, using the same bit.
Signed-off-by: Joao Martins <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
---
arch/arm64/include/asm/thread_info.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..72273a2168fa 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -69,6 +69,9 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
#define TIF_SECCOMP 11 /* syscall secure computing */
#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE) || IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+#define TIF_POLLING_NRFLAG 16 /* poll_idle() polls TIF_NEED_RESCHED */
+#endif
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -91,6 +94,9 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE) || IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#endif
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
--
1.8.3.1
Replace ARCH_HAS_CPU_RELAX with ARCH_WANTS_IDLE_POLL for clarity as it controls
the building of poll_state.
Suggested-by: Will Deacon <[email protected]>
Signed-off-by: Ankur arora <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
---
arch/Kconfig | 2 +-
arch/arm64/Kconfig | 2 +-
arch/x86/Kconfig | 2 +-
drivers/acpi/processor_idle.c | 4 ++--
drivers/cpuidle/Makefile | 2 +-
include/linux/cpuidle.h | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 5b2e8a88853c..e7659a3a7d58 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1363,7 +1363,7 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
-config ARCH_HAS_CPU_RELAX
+config ARCH_WANTS_IDLE_POLL
bool
config ARCH_HAS_CC_PLATFORM
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bc628a3165eb..7c963f7c10e4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -107,7 +107,7 @@ config ARM64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
- select ARCH_HAS_CPU_RELAX
+ select ARCH_WANTS_IDLE_POLL
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8c4312133832..90f5d16be8c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -73,7 +73,7 @@ config X86
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
- select ARCH_HAS_CPU_RELAX
+ select ARCH_WANTS_IDLE_POLL
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 55437f5e0c3a..6a0a1f16a5c3 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -36,7 +36,7 @@
#include <asm/cpu.h>
#endif
-#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX) ? 1 : 0)
+#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_WANTS_IDLE_POLL) ? 1 : 0)
static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
module_param(max_cstate, uint, 0400);
@@ -787,7 +787,7 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
if (max_cstate == 0)
max_cstate = 1;
- if (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX)) {
+ if (IS_ENABLED(CONFIG_ARCH_WANTS_IDLE_POLL)) {
cpuidle_poll_state_init(drv);
count = 1;
} else {
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index d103342b7cfc..23f48d99f0f2 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -7,7 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
obj-$(CONFIG_DT_IDLE_GENPD) += dt_idle_genpd.o
-obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
+obj-$(CONFIG_ARCH_WANTS_IDLE_POLL) += poll_state.o
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
##################################################################################
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3183aeb7f5b4..53e55a91d55d 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -275,7 +275,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev,
}
#endif
-#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_WANTS_IDLE_POLL)
void cpuidle_poll_state_init(struct cpuidle_driver *drv);
#else
static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
--
1.8.3.1