2022-06-08 15:25:44

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 14/36] cpuidle: Fix rcu_idle_*() usage

The whole disable-RCU, enable-IRQS dance is very intricate since
changing IRQ state is traced, which depends on RCU.

Add two helpers for the cpuidle case that mirror the entry code.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/arm/mach-imx/cpuidle-imx6q.c | 4 +--
arch/arm/mach-imx/cpuidle-imx6sx.c | 4 +--
arch/arm/mach-omap2/cpuidle34xx.c | 4 +--
arch/arm/mach-omap2/cpuidle44xx.c | 8 +++---
drivers/acpi/processor_idle.c | 18 ++++++++------
drivers/cpuidle/cpuidle-big_little.c | 4 +--
drivers/cpuidle/cpuidle-mvebu-v7.c | 4 +--
drivers/cpuidle/cpuidle-psci.c | 4 +--
drivers/cpuidle/cpuidle-riscv-sbi.c | 4 +--
drivers/cpuidle/cpuidle-tegra.c | 8 +++---
drivers/cpuidle/cpuidle.c | 11 ++++----
include/linux/cpuidle.h | 37 +++++++++++++++++++++++++---
kernel/sched/idle.c | 45 ++++++++++-------------------------
kernel/time/tick-broadcast.c | 6 +++-
14 files changed, 90 insertions(+), 71 deletions(-)

--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -24,9 +24,9 @@ static int imx6q_enter_wait(struct cpuid
imx6_set_lpm(WAIT_UNCLOCKED);
raw_spin_unlock(&cpuidle_lock);

- rcu_idle_enter();
+ cpuidle_rcu_enter();
cpu_do_idle();
- rcu_idle_exit();
+ cpuidle_rcu_exit();

raw_spin_lock(&cpuidle_lock);
if (num_idle_cpus-- == num_online_cpus())
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpui
cpu_pm_enter();
cpu_cluster_pm_enter();

- rcu_idle_enter();
+ cpuidle_rcu_enter();
cpu_suspend(0, imx6sx_idle_finish);
- rcu_idle_exit();
+ cpuidle_rcu_exit();

cpu_cluster_pm_exit();
cpu_pm_exit();
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuid
}

/* Execute ARM wfi */
- rcu_idle_enter();
+ cpuidle_rcu_enter();
omap_sram_idle();
- rcu_idle_exit();
+ cpuidle_rcu_exit();

/*
* Call idle CPU PM enter notifier chain to restore
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cp
}
raw_spin_unlock_irqrestore(&mpu_lock, flag);

- rcu_idle_enter();
+ cpuidle_rcu_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
- rcu_idle_exit();
+ cpuidle_rcu_exit();

raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus())
@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struc
}
}

- rcu_idle_enter();
+ cpuidle_rcu_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
cpu_done[dev->cpu] = true;
- rcu_idle_exit();
+ cpuidle_rcu_exit();

/* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -607,7 +607,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
* @cx: Target state context
* @index: index of target state
*/
-static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
+static noinstr int acpi_idle_enter_bm(struct cpuidle_driver *drv,
struct acpi_processor *pr,
struct acpi_processor_cx *cx,
int index)
@@ -626,6 +626,8 @@ static int acpi_idle_enter_bm(struct cpu
*/
bool dis_bm = pr->flags.bm_control;

+ instrumentation_begin();
+
/* If we can skip BM, demote to a safe state. */
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
dis_bm = false;
@@ -647,11 +649,11 @@ static int acpi_idle_enter_bm(struct cpu
raw_spin_unlock(&c3_lock);
}

- rcu_idle_enter();
+ cpuidle_rcu_enter();

acpi_idle_do_entry(cx);

- rcu_idle_exit();
+ cpuidle_rcu_exit();

/* Re-enable bus master arbitration */
if (dis_bm) {
@@ -661,11 +663,13 @@ static int acpi_idle_enter_bm(struct cpu
raw_spin_unlock(&c3_lock);
}

+ instrumentation_end();
+
return index;
}

-static int acpi_idle_enter(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
+static noinstr int acpi_idle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
struct acpi_processor *pr;
@@ -693,8 +697,8 @@ static int acpi_idle_enter(struct cpuidl
return index;
}

-static int acpi_idle_enter_s2idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
+static noinstr int acpi_idle_enter_s2idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);

--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpu
struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
- rcu_idle_enter();
+ cpuidle_rcu_enter();

cpu_suspend(0, bl_powerdown_finisher);

/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
- rcu_idle_exit();
+ cpuidle_rcu_exit();

cpu_pm_exit();

--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cp
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;

- rcu_idle_enter();
+ cpuidle_rcu_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
- rcu_idle_exit();
+ cpuidle_rcu_exit();

cpu_pm_exit();

--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_stat
else
pm_runtime_put_sync_suspend(pd_dev);

- rcu_idle_enter();
+ cpuidle_rcu_enter();

state = psci_get_domain_state();
if (!state)
@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_stat

ret = psci_cpu_suspend_enter(state) ? -1 : idx;

- rcu_idle_exit();
+ cpuidle_rcu_exit();

if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -121,7 +121,7 @@ static int __sbi_enter_domain_idle_state
else
pm_runtime_put_sync_suspend(pd_dev);

- rcu_idle_enter();
+ cpuidle_rcu_enter();

if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
@@ -130,7 +130,7 @@ static int __sbi_enter_domain_idle_state

ret = sbi_suspend(state) ? -1 : idx;

- rcu_idle_exit();
+ cpuidle_rcu_exit();

if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(str
tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();

- rcu_idle_enter();
+ cpuidle_rcu_enter();

switch (index) {
case TEGRA_C7:
@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(str
break;
}

- rcu_idle_exit();
+ cpuidle_rcu_exit();

cpu_pm_exit();
tegra_pm_clear_cpu_in_lp2();
@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cp

if (index == TEGRA_C1) {
if (do_rcu)
- rcu_idle_enter();
+ cpuidle_rcu_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
if (do_rcu)
- rcu_idle_exit();
+ cpuidle_rcu_exit();
} else
ret = tegra_cpuidle_state_enter(dev, index, cpu);

--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -13,6 +13,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -150,12 +151,12 @@ static void enter_s2idle_proper(struct c
*/
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_enter();
+ cpuidle_rcu_enter();
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
+ raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_exit();
+ cpuidle_rcu_exit();
tick_unfreeze();
start_critical_timings();

@@ -233,14 +234,14 @@ int cpuidle_enter_state(struct cpuidle_d

stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_enter();
+ cpuidle_rcu_enter();

entered_state = target_state->enter(dev, drv, index);
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
raw_local_irq_disable();

if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- rcu_idle_exit();
+ cpuidle_rcu_exit();
start_critical_timings();

sched_clock_idle_wakeup_event();
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -115,6 +115,35 @@ struct cpuidle_device {
DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);

+static __always_inline void cpuidle_rcu_enter(void)
+{
+ lockdep_assert_irqs_disabled();
+ /*
+ * Idle is allowed to (temporary) enable IRQs. It
+ * will return with IRQs disabled.
+ *
+ * Trace IRQs enable here, then switch off RCU, and have
+ * arch_cpu_idle() use raw_local_irq_enable(). Note that
+ * rcu_idle_enter() relies on lockdep IRQ state, so switch that
+ * last -- this is very similar to the entry code.
+ */
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+ rcu_idle_enter();
+ lockdep_hardirqs_on(_THIS_IP_);
+}
+
+static __always_inline void cpuidle_rcu_exit(void)
+{
+ /*
+ * Carefully undo the above.
+ */
+ lockdep_hardirqs_off(_THIS_IP_);
+ rcu_idle_exit();
+ instrumentation_begin();
+}
+
/****************************
* CPUIDLE DRIVER INTERFACE *
****************************/
@@ -282,18 +311,18 @@ extern s64 cpuidle_governor_latency_req(
int __ret = 0; \
\
if (!idx) { \
- rcu_idle_enter(); \
+ cpuidle_rcu_enter(); \
cpu_do_idle(); \
- rcu_idle_exit(); \
+ cpuidle_rcu_exit(); \
return idx; \
} \
\
if (!is_retention) \
__ret = cpu_pm_enter(); \
if (!__ret) { \
- rcu_idle_enter(); \
+ cpuidle_rcu_enter(); \
__ret = low_level_idle_enter(state); \
- rcu_idle_exit(); \
+ cpuidle_rcu_exit(); \
if (!is_retention) \
cpu_pm_exit(); \
} \
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);

static noinline int __cpuidle cpu_idle_poll(void)
{
+ instrumentation_begin();
trace_cpu_idle(0, smp_processor_id());
stop_critical_timings();
- rcu_idle_enter();
- local_irq_enable();
+ cpuidle_rcu_enter();

+ raw_local_irq_enable();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
+ raw_local_irq_disable();

- rcu_idle_exit();
+ cpuidle_rcu_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ local_irq_enable();
+ instrumentation_end();

return 1;
}
@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
*/
void __cpuidle default_idle_call(void)
{
- if (current_clr_polling_and_test()) {
- local_irq_enable();
- } else {
-
+ instrumentation_begin();
+ if (!current_clr_polling_and_test()) {
trace_cpu_idle(1, smp_processor_id());
stop_critical_timings();

- /*
- * arch_cpu_idle() is supposed to enable IRQs, however
- * we can't do that because of RCU and tracing.
- *
- * Trace IRQs enable here, then switch off RCU, and have
- * arch_cpu_idle() use raw_local_irq_enable(). Note that
- * rcu_idle_enter() relies on lockdep IRQ state, so switch that
- * last -- this is very similar to the entry code.
- */
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- rcu_idle_enter();
- lockdep_hardirqs_on(_THIS_IP_);
-
+ cpuidle_rcu_enter();
arch_cpu_idle();
-
- /*
- * OK, so IRQs are enabled here, but RCU needs them disabled to
- * turn itself back on.. funny thing is that disabling IRQs
- * will cause tracing, which needs RCU. Jump through hoops to
- * make it 'work'.
- */
raw_local_irq_disable();
- lockdep_hardirqs_off(_THIS_IP_);
- rcu_idle_exit();
- lockdep_hardirqs_on(_THIS_IP_);
- raw_local_irq_enable();
+ cpuidle_rcu_exit();

start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
}
+ local_irq_enable();
+ instrumentation_end();
}

static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
* to avoid a deep idle transition as we are about to get the
* broadcast IPI right away.
*/
-int tick_check_broadcast_expired(void)
+noinstr int tick_check_broadcast_expired(void)
{
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
+ return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
+#else
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+#endif
}

/*



2022-06-14 12:45:37

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 14/36] cpuidle: Fix rcu_idle_*() usage

On Wed, Jun 08, 2022 at 04:27:37PM +0200, Peter Zijlstra wrote:
> --- a/kernel/time/tick-broadcast.c
> +++ b/kernel/time/tick-broadcast.c
> @@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
> * to avoid a deep idle transition as we are about to get the
> * broadcast IPI right away.
> */
> -int tick_check_broadcast_expired(void)
> +noinstr int tick_check_broadcast_expired(void)
> {
> +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
> + return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
> +#else
> return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
> +#endif
> }

This is somewhat not-ideal. :/

Could we unconditionally do the arch_test_bit() variant, with a comment, or
does that not exist in some cases?

Thanks,
Mark.

2022-06-14 17:13:06

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 14/36] cpuidle: Fix rcu_idle_*() usage

On Tue, Jun 14, 2022 at 01:41:13PM +0100, Mark Rutland wrote:
> On Wed, Jun 08, 2022 at 04:27:37PM +0200, Peter Zijlstra wrote:
> > --- a/kernel/time/tick-broadcast.c
> > +++ b/kernel/time/tick-broadcast.c
> > @@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
> > * to avoid a deep idle transition as we are about to get the
> > * broadcast IPI right away.
> > */
> > -int tick_check_broadcast_expired(void)
> > +noinstr int tick_check_broadcast_expired(void)
> > {
> > +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
> > + return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
> > +#else
> > return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
> > +#endif
> > }
>
> This is somewhat not-ideal. :/

I'll say.

> Could we unconditionally do the arch_test_bit() variant, with a comment, or
> does that not exist in some cases?

Loads of build errors ensued, which is how I ended up with this mess ...

2022-06-14 17:14:12

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 14/36] cpuidle: Fix rcu_idle_*() usage

On Tue, Jun 14, 2022 at 06:40:53PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 14, 2022 at 01:41:13PM +0100, Mark Rutland wrote:
> > On Wed, Jun 08, 2022 at 04:27:37PM +0200, Peter Zijlstra wrote:
> > > --- a/kernel/time/tick-broadcast.c
> > > +++ b/kernel/time/tick-broadcast.c
> > > @@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
> > > * to avoid a deep idle transition as we are about to get the
> > > * broadcast IPI right away.
> > > */
> > > -int tick_check_broadcast_expired(void)
> > > +noinstr int tick_check_broadcast_expired(void)
> > > {
> > > +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
> > > + return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
> > > +#else
> > > return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
> > > +#endif
> > > }
> >
> > This is somewhat not-ideal. :/
>
> I'll say.
>
> > Could we unconditionally do the arch_test_bit() variant, with a comment, or
> > does that not exist in some cases?
>
> Loads of build errors ensued, which is how I ended up with this mess ...

Yaey :(

I see the same is true for the thread flag manipulation too.

I'll take a look and see if we can layer things so that we can use the arch_*()
helpers and wrap those consistently so that we don't have to check the CPP
guard.

Ideally we'd have a a better language that allows us to make some
context-senstive decisions, then we could hide all this gunk in the lower
levels with somethin like:

if (!THIS_IS_A_NOINSTR_FUNCTION()) {
explicit_instrumentation(...);
}

... ho hum.

Mark.

2022-07-26 10:13:55

by Gautham R. Shenoy

[permalink] [raw]
Subject: Re: [PATCH 14/36] cpuidle: Fix rcu_idle_*() usage

On Wed, Jun 08, 2022 at 04:27:37PM +0200, Peter Zijlstra wrote:
> The whole disable-RCU, enable-IRQS dance is very intricate since
> changing IRQ state is traced, which depends on RCU.
>
> Add two helpers for the cpuidle case that mirror the entry code.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>

Reviewed-by: Gautham R. Shenoy <[email protected]>

> ---
> arch/arm/mach-imx/cpuidle-imx6q.c | 4 +--
> arch/arm/mach-imx/cpuidle-imx6sx.c | 4 +--
> arch/arm/mach-omap2/cpuidle34xx.c | 4 +--
> arch/arm/mach-omap2/cpuidle44xx.c | 8 +++---
> drivers/acpi/processor_idle.c | 18 ++++++++------
> drivers/cpuidle/cpuidle-big_little.c | 4 +--
> drivers/cpuidle/cpuidle-mvebu-v7.c | 4 +--
> drivers/cpuidle/cpuidle-psci.c | 4 +--
> drivers/cpuidle/cpuidle-riscv-sbi.c | 4 +--
> drivers/cpuidle/cpuidle-tegra.c | 8 +++---
> drivers/cpuidle/cpuidle.c | 11 ++++----
> include/linux/cpuidle.h | 37 +++++++++++++++++++++++++---
> kernel/sched/idle.c | 45 ++++++++++-------------------------
> kernel/time/tick-broadcast.c | 6 +++-
> 14 files changed, 90 insertions(+), 71 deletions(-)
>
> --- a/arch/arm/mach-imx/cpuidle-imx6q.c
> +++ b/arch/arm/mach-imx/cpuidle-imx6q.c
> @@ -24,9 +24,9 @@ static int imx6q_enter_wait(struct cpuid
> imx6_set_lpm(WAIT_UNCLOCKED);
> raw_spin_unlock(&cpuidle_lock);
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> cpu_do_idle();
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> raw_spin_lock(&cpuidle_lock);
> if (num_idle_cpus-- == num_online_cpus())
> --- a/arch/arm/mach-imx/cpuidle-imx6sx.c
> +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
> @@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpui
> cpu_pm_enter();
> cpu_cluster_pm_enter();
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> cpu_suspend(0, imx6sx_idle_finish);
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> cpu_cluster_pm_exit();
> cpu_pm_exit();
> --- a/arch/arm/mach-omap2/cpuidle34xx.c
> +++ b/arch/arm/mach-omap2/cpuidle34xx.c
> @@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuid
> }
>
> /* Execute ARM wfi */
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> omap_sram_idle();
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> /*
> * Call idle CPU PM enter notifier chain to restore
> --- a/arch/arm/mach-omap2/cpuidle44xx.c
> +++ b/arch/arm/mach-omap2/cpuidle44xx.c
> @@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cp
> }
> raw_spin_unlock_irqrestore(&mpu_lock, flag);
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> omap4_enter_lowpower(dev->cpu, cx->cpu_state);
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> raw_spin_lock_irqsave(&mpu_lock, flag);
> if (cx->mpu_state_vote == num_online_cpus())
> @@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struc
> }
> }
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> omap4_enter_lowpower(dev->cpu, cx->cpu_state);
> cpu_done[dev->cpu] = true;
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> /* Wakeup CPU1 only if it is not offlined */
> if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -607,7 +607,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
> * @cx: Target state context
> * @index: index of target state
> */
> -static int acpi_idle_enter_bm(struct cpuidle_driver *drv,
> +static noinstr int acpi_idle_enter_bm(struct cpuidle_driver *drv,
> struct acpi_processor *pr,
> struct acpi_processor_cx *cx,
> int index)
> @@ -626,6 +626,8 @@ static int acpi_idle_enter_bm(struct cpu
> */
> bool dis_bm = pr->flags.bm_control;
>
> + instrumentation_begin();
> +
> /* If we can skip BM, demote to a safe state. */
> if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
> dis_bm = false;
> @@ -647,11 +649,11 @@ static int acpi_idle_enter_bm(struct cpu
> raw_spin_unlock(&c3_lock);
> }
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> acpi_idle_do_entry(cx);
>
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> /* Re-enable bus master arbitration */
> if (dis_bm) {
> @@ -661,11 +663,13 @@ static int acpi_idle_enter_bm(struct cpu
> raw_spin_unlock(&c3_lock);
> }
>
> + instrumentation_end();
> +
> return index;
> }
>
> -static int acpi_idle_enter(struct cpuidle_device *dev,
> - struct cpuidle_driver *drv, int index)
> +static noinstr int acpi_idle_enter(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv, int index)
> {
> struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
> struct acpi_processor *pr;
> @@ -693,8 +697,8 @@ static int acpi_idle_enter(struct cpuidl
> return index;
> }
>
> -static int acpi_idle_enter_s2idle(struct cpuidle_device *dev,
> - struct cpuidle_driver *drv, int index)
> +static noinstr int acpi_idle_enter_s2idle(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv, int index)
> {
> struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
>
> --- a/drivers/cpuidle/cpuidle-big_little.c
> +++ b/drivers/cpuidle/cpuidle-big_little.c
> @@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpu
> struct cpuidle_driver *drv, int idx)
> {
> cpu_pm_enter();
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> cpu_suspend(0, bl_powerdown_finisher);
>
> /* signals the MCPM core that CPU is out of low power state */
> mcpm_cpu_powered_up();
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> cpu_pm_exit();
>
> --- a/drivers/cpuidle/cpuidle-mvebu-v7.c
> +++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
> @@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cp
> if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
> deepidle = true;
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> ret = mvebu_v7_cpu_suspend(deepidle);
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> cpu_pm_exit();
>
> --- a/drivers/cpuidle/cpuidle-psci.c
> +++ b/drivers/cpuidle/cpuidle-psci.c
> @@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_stat
> else
> pm_runtime_put_sync_suspend(pd_dev);
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> state = psci_get_domain_state();
> if (!state)
> @@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_stat
>
> ret = psci_cpu_suspend_enter(state) ? -1 : idx;
>
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> if (s2idle)
> dev_pm_genpd_resume(pd_dev);
> --- a/drivers/cpuidle/cpuidle-riscv-sbi.c
> +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
> @@ -121,7 +121,7 @@ static int __sbi_enter_domain_idle_state
> else
> pm_runtime_put_sync_suspend(pd_dev);
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> if (sbi_is_domain_state_available())
> state = sbi_get_domain_state();
> @@ -130,7 +130,7 @@ static int __sbi_enter_domain_idle_state
>
> ret = sbi_suspend(state) ? -1 : idx;
>
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> if (s2idle)
> dev_pm_genpd_resume(pd_dev);
> --- a/drivers/cpuidle/cpuidle-tegra.c
> +++ b/drivers/cpuidle/cpuidle-tegra.c
> @@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(str
> tegra_pm_set_cpu_in_lp2();
> cpu_pm_enter();
>
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> switch (index) {
> case TEGRA_C7:
> @@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(str
> break;
> }
>
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
>
> cpu_pm_exit();
> tegra_pm_clear_cpu_in_lp2();
> @@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cp
>
> if (index == TEGRA_C1) {
> if (do_rcu)
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> ret = arm_cpuidle_simple_enter(dev, drv, index);
> if (do_rcu)
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
> } else
> ret = tegra_cpuidle_state_enter(dev, index, cpu);
>
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -13,6 +13,7 @@
> #include <linux/mutex.h>
> #include <linux/sched.h>
> #include <linux/sched/clock.h>
> +#include <linux/sched/idle.h>
> #include <linux/notifier.h>
> #include <linux/pm_qos.h>
> #include <linux/cpu.h>
> @@ -150,12 +151,12 @@ static void enter_s2idle_proper(struct c
> */
> stop_critical_timings();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
> target_state->enter_s2idle(dev, drv, index);
> if (WARN_ON_ONCE(!irqs_disabled()))
> - local_irq_disable();
> + raw_local_irq_disable();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
> tick_unfreeze();
> start_critical_timings();
>
> @@ -233,14 +234,14 @@ int cpuidle_enter_state(struct cpuidle_d
>
> stop_critical_timings();
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_enter();
> + cpuidle_rcu_enter();
>
> entered_state = target_state->enter(dev, drv, index);
> if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
> raw_local_irq_disable();
>
> if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
> start_critical_timings();
>
> sched_clock_idle_wakeup_event();
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -115,6 +115,35 @@ struct cpuidle_device {
> DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
> DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
>
> +static __always_inline void cpuidle_rcu_enter(void)
> +{
> + lockdep_assert_irqs_disabled();
> + /*
> + * Idle is allowed to (temporary) enable IRQs. It
> + * will return with IRQs disabled.
> + *
> + * Trace IRQs enable here, then switch off RCU, and have
> + * arch_cpu_idle() use raw_local_irq_enable(). Note that
> + * rcu_idle_enter() relies on lockdep IRQ state, so switch that
> + * last -- this is very similar to the entry code.
> + */
> + trace_hardirqs_on_prepare();
> + lockdep_hardirqs_on_prepare();
> + instrumentation_end();
> + rcu_idle_enter();
> + lockdep_hardirqs_on(_THIS_IP_);
> +}
> +
> +static __always_inline void cpuidle_rcu_exit(void)
> +{
> + /*
> + * Carefully undo the above.
> + */
> + lockdep_hardirqs_off(_THIS_IP_);
> + rcu_idle_exit();
> + instrumentation_begin();
> +}
> +
> /****************************
> * CPUIDLE DRIVER INTERFACE *
> ****************************/
> @@ -282,18 +311,18 @@ extern s64 cpuidle_governor_latency_req(
> int __ret = 0; \
> \
> if (!idx) { \
> - rcu_idle_enter(); \
> + cpuidle_rcu_enter(); \
> cpu_do_idle(); \
> - rcu_idle_exit(); \
> + cpuidle_rcu_exit(); \
> return idx; \
> } \
> \
> if (!is_retention) \
> __ret = cpu_pm_enter(); \
> if (!__ret) { \
> - rcu_idle_enter(); \
> + cpuidle_rcu_enter(); \
> __ret = low_level_idle_enter(state); \
> - rcu_idle_exit(); \
> + cpuidle_rcu_exit(); \
> if (!is_retention) \
> cpu_pm_exit(); \
> } \
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);
>
> static noinline int __cpuidle cpu_idle_poll(void)
> {
> + instrumentation_begin();
> trace_cpu_idle(0, smp_processor_id());
> stop_critical_timings();
> - rcu_idle_enter();
> - local_irq_enable();
> + cpuidle_rcu_enter();
>
> + raw_local_irq_enable();
> while (!tif_need_resched() &&
> (cpu_idle_force_poll || tick_check_broadcast_expired()))
> cpu_relax();
> + raw_local_irq_disable();
>
> - rcu_idle_exit();
> + cpuidle_rcu_exit();
> start_critical_timings();
> trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
> + local_irq_enable();
> + instrumentation_end();
>
> return 1;
> }
> @@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
> */
> void __cpuidle default_idle_call(void)
> {
> - if (current_clr_polling_and_test()) {
> - local_irq_enable();
> - } else {
> -
> + instrumentation_begin();
> + if (!current_clr_polling_and_test()) {
> trace_cpu_idle(1, smp_processor_id());
> stop_critical_timings();
>
> - /*
> - * arch_cpu_idle() is supposed to enable IRQs, however
> - * we can't do that because of RCU and tracing.
> - *
> - * Trace IRQs enable here, then switch off RCU, and have
> - * arch_cpu_idle() use raw_local_irq_enable(). Note that
> - * rcu_idle_enter() relies on lockdep IRQ state, so switch that
> - * last -- this is very similar to the entry code.
> - */
> - trace_hardirqs_on_prepare();
> - lockdep_hardirqs_on_prepare();
> - rcu_idle_enter();
> - lockdep_hardirqs_on(_THIS_IP_);
> -
> + cpuidle_rcu_enter();
> arch_cpu_idle();
> -
> - /*
> - * OK, so IRQs are enabled here, but RCU needs them disabled to
> - * turn itself back on.. funny thing is that disabling IRQs
> - * will cause tracing, which needs RCU. Jump through hoops to
> - * make it 'work'.
> - */
> raw_local_irq_disable();
> - lockdep_hardirqs_off(_THIS_IP_);
> - rcu_idle_exit();
> - lockdep_hardirqs_on(_THIS_IP_);
> - raw_local_irq_enable();
> + cpuidle_rcu_exit();
>
> start_critical_timings();
> trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
> }
> + local_irq_enable();
> + instrumentation_end();
> }
>
> static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
> --- a/kernel/time/tick-broadcast.c
> +++ b/kernel/time/tick-broadcast.c
> @@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
> * to avoid a deep idle transition as we are about to get the
> * broadcast IPI right away.
> */
> -int tick_check_broadcast_expired(void)
> +noinstr int tick_check_broadcast_expired(void)
> {
> +#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
> + return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
> +#else
> return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
> +#endif
> }
>
> /*
>
>