2024-02-15 09:29:36

by Mihai Carabas

[permalink] [raw]
Subject: [PATCH v4] Enable haltpoll for arm64

This patchset enables the usage of haltpoll governer on arm64. This is
specifically interesting for KVM guests by reducing the IPC latencies.

Here are some benchmarks without/with haltpoll for a KVM guest:

a) without haltpoll:
perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes

Total time: 8.138 [sec]

8.138094 usecs/op
122878 ops/sec

b) with haltpoll:
perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes

Total time: 5.003 [sec]

5.003085 usecs/op
199876 ops/sec

v4 changes from v3:
- change 7/8 per Rafael input: drop the parens and use ret for the final check
- add 8/8 which renames the guard for building poll_state

v3 changes from v2:
- fix 1/7 per Petr Mladek - remove ARCH_HAS_CPU_RELAX from arch/x86/Kconfig
- add Ack-by from Rafael Wysocki on 2/7

v2 changes from v1:
- added patch 7 where we change cpu_relax with smp_cond_load_relaxed per PeterZ
(this improves by 50% at least the CPU cycles consumed in the tests above:
10,716,881,137 now vs 14,503,014,257 before)
- removed the ifdef from patch 1 per RafaelW


Joao Martins (6):
x86: Move ARCH_HAS_CPU_RELAX to arch
x86/kvm: Move haltpoll_want() to be arch defined
governors/haltpoll: Drop kvm_para_available() check
arm64: Select ARCH_HAS_CPU_RELAX
arm64: Define TIF_POLLING_NRFLAG
cpuidle-haltpoll: ARM64 support

Mihai Carabas (2):
cpuidle/poll_state: replace cpu_relax with smp_cond_load_relaxed
cpuidle: replace with HAS_CPU_RELAX with HAS_WANTS_IDLE_POLL

arch/Kconfig | 3 +++
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/thread_info.h | 6 ++++++
arch/x86/Kconfig | 4 +---
arch/x86/include/asm/cpuidle_haltpoll.h | 1 +
arch/x86/kernel/kvm.c | 10 ++++++++++
drivers/acpi/processor_idle.c | 4 ++--
drivers/cpuidle/Kconfig | 4 ++--
drivers/cpuidle/Makefile | 2 +-
drivers/cpuidle/cpuidle-haltpoll.c | 8 ++------
drivers/cpuidle/governors/haltpoll.c | 5 +----
drivers/cpuidle/poll_state.c | 15 ++++++++++-----
include/linux/cpuidle.h | 2 +-
include/linux/cpuidle_haltpoll.h | 5 +++++
14 files changed, 46 insertions(+), 24 deletions(-)

--
1.8.3.1



2024-02-15 09:29:42

by Mihai Carabas

[permalink] [raw]
Subject: [PATCH v4 5/8] arm64: Define TIF_POLLING_NRFLAG

From: Joao Martins <[email protected]>

The default idle method for arm64 is WFI and it therefore
unconditionally requires the reschedule interrupt when idle.

Commit 842514849a61 ("arm64: Remove TIF_POLLING_NRFLAG") had
reverted it because WFI was the only idle method. ARM64 support
for haltpoll means that poll_idle() polls for TIF_POLLING_NRFLAG,
so define on arm64 *only if* haltpoll is built, using the same bit.

Signed-off-by: Joao Martins <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
---
arch/arm64/include/asm/thread_info.h | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..72273a2168fa 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -69,6 +69,9 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
#define TIF_SECCOMP 11 /* syscall secure computing */
#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE) || IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+#define TIF_POLLING_NRFLAG 16 /* poll_idle() polls TIF_NEED_RESCHED */
+#endif
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -91,6 +94,9 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE) || IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE_MODULE)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#endif
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
--
1.8.3.1


2024-02-15 09:37:08

by Mihai Carabas

[permalink] [raw]
Subject: [PATCH v4 8/8] cpuidle: replace with HAS_CPU_RELAX with HAS_WANTS_IDLE_POLL

Replace ARCH_HAS_CPU_RELAX with ARCH_WANTS_IDLE_POLL for clarity as it controls
the building of poll_state.

Suggested-by: Will Deacon <[email protected]>
Signed-off-by: Ankur arora <[email protected]>
Signed-off-by: Mihai Carabas <[email protected]>
---
arch/Kconfig | 2 +-
arch/arm64/Kconfig | 2 +-
arch/x86/Kconfig | 2 +-
drivers/acpi/processor_idle.c | 4 ++--
drivers/cpuidle/Makefile | 2 +-
include/linux/cpuidle.h | 2 +-
6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5b2e8a88853c..e7659a3a7d58 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1363,7 +1363,7 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool

-config ARCH_HAS_CPU_RELAX
+config ARCH_WANTS_IDLE_POLL
bool

config ARCH_HAS_CC_PLATFORM
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bc628a3165eb..7c963f7c10e4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -107,7 +107,7 @@ config ARM64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_NO_INSTR
select ARCH_WANTS_THP_SWAP if ARM64_4K_PAGES
- select ARCH_HAS_CPU_RELAX
+ select ARCH_WANTS_IDLE_POLL
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARM_AMBA
select ARM_ARCH_TIMER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8c4312133832..90f5d16be8c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -73,7 +73,7 @@ config X86
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
- select ARCH_HAS_CPU_RELAX
+ select ARCH_WANTS_IDLE_POLL
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 55437f5e0c3a..6a0a1f16a5c3 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -36,7 +36,7 @@
#include <asm/cpu.h>
#endif

-#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX) ? 1 : 0)
+#define ACPI_IDLE_STATE_START (IS_ENABLED(CONFIG_ARCH_WANTS_IDLE_POLL) ? 1 : 0)

static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
module_param(max_cstate, uint, 0400);
@@ -787,7 +787,7 @@ static int acpi_processor_setup_cstates(struct acpi_processor *pr)
if (max_cstate == 0)
max_cstate = 1;

- if (IS_ENABLED(CONFIG_ARCH_HAS_CPU_RELAX)) {
+ if (IS_ENABLED(CONFIG_ARCH_WANTS_IDLE_POLL)) {
cpuidle_poll_state_init(drv);
count = 1;
} else {
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index d103342b7cfc..23f48d99f0f2 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -7,7 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
obj-$(CONFIG_DT_IDLE_GENPD) += dt_idle_genpd.o
-obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
+obj-$(CONFIG_ARCH_WANTS_IDLE_POLL) += poll_state.o
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o

##################################################################################
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 3183aeb7f5b4..53e55a91d55d 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -275,7 +275,7 @@ static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev,
}
#endif

-#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_WANTS_IDLE_POLL)
void cpuidle_poll_state_init(struct cpuidle_driver *drv);
#else
static inline void cpuidle_poll_state_init(struct cpuidle_driver *drv) {}
--
1.8.3.1