2020-05-05 14:16:42

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V4 part 1 25/36] rcu/tree: Mark the idle relevant functions noinstr

These functions are invoked from context tracking and other places in the
low level entry code. Move them into the .noinstr.text section to exclude
them from instrumentation.

Mark the places which are safe to invoke traceable functions with
instr_begin/end() so objtool won't complain.

Signed-off-by: Thomas Gleixner <[email protected]>
---
kernel/rcu/tree.c | 85 +++++++++++++++++++++++++----------------------
kernel/rcu/tree_plugin.h | 4 +-
kernel/rcu/update.c | 7 +--
3 files changed, 52 insertions(+), 44 deletions(-)

--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -75,9 +75,6 @@
*/
#define RCU_DYNTICK_CTRL_MASK 0x1
#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
-#ifndef rcu_eqs_special_exit
-#define rcu_eqs_special_exit() do { } while (0)
-#endif

static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks_nesting = 1,
@@ -229,7 +226,7 @@ void rcu_softirq_qs(void)
* RCU is watching prior to the call to this function and is no longer
* watching upon return.
*/
-static void rcu_dynticks_eqs_enter(void)
+static noinstr void rcu_dynticks_eqs_enter(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
@@ -253,7 +250,7 @@ static void rcu_dynticks_eqs_enter(void)
* called from an extended quiescent state, that is, RCU is not watching
* prior to the call to this function and is watching upon return.
*/
-static void rcu_dynticks_eqs_exit(void)
+static noinstr void rcu_dynticks_eqs_exit(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
@@ -270,8 +267,6 @@ static void rcu_dynticks_eqs_exit(void)
if (seq & RCU_DYNTICK_CTRL_MASK) {
atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
smp_mb__after_atomic(); /* _exit after clearing mask. */
- /* Prefer duplicate flushes to losing a flush. */
- rcu_eqs_special_exit();
}
}

@@ -299,7 +294,7 @@ static void rcu_dynticks_eqs_online(void
*
* No ordering, as we are sampling CPU-local information.
*/
-static bool rcu_dynticks_curr_cpu_in_eqs(void)
+static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);

@@ -566,7 +561,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
-static void rcu_eqs_enter(bool user)
+static noinstr void rcu_eqs_enter(bool user)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);

@@ -581,12 +576,14 @@ static void rcu_eqs_enter(bool user)
}

lockdep_assert_irqs_disabled();
+ instr_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rdp = this_cpu_ptr(&rcu_data);
do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
+ instr_end();
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
rcu_dynticks_eqs_enter();
@@ -623,7 +620,7 @@ void rcu_idle_enter(void)
* If you add or remove a call to rcu_user_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_user_enter(void)
+noinstr void rcu_user_enter(void)
{
lockdep_assert_irqs_disabled();
rcu_eqs_enter(true);
@@ -656,19 +653,23 @@ static __always_inline void rcu_nmi_exit
* leave it in non-RCU-idle state.
*/
if (rdp->dynticks_nmi_nesting != 1) {
+ instr_begin();
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
rdp->dynticks_nmi_nesting - 2);
+ instr_end();
return;
}

+ instr_begin();
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */

if (irq)
rcu_prepare_for_idle();
+ instr_end();

// RCU is watching here ...
rcu_dynticks_eqs_enter();
@@ -684,7 +685,7 @@ static __always_inline void rcu_nmi_exit
* If you add or remove a call to rcu_nmi_exit(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_nmi_exit(void)
+void noinstr rcu_nmi_exit(void)
{
rcu_nmi_exit_common(false);
}
@@ -708,7 +709,7 @@ void rcu_nmi_exit(void)
* If you add or remove a call to rcu_irq_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_irq_exit(void)
+void noinstr rcu_irq_exit(void)
{
lockdep_assert_irqs_disabled();
rcu_nmi_exit_common(true);
@@ -737,7 +738,7 @@ void rcu_irq_exit_irqson(void)
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
-static void rcu_eqs_exit(bool user)
+static void noinstr rcu_eqs_exit(bool user)
{
struct rcu_data *rdp;
long oldval;
@@ -755,12 +756,14 @@ static void rcu_eqs_exit(bool user)
// RCU is not watching here ...
rcu_dynticks_eqs_exit();
// ... but is watching here.
+ instr_begin();
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(rdp->dynticks_nesting, 1);
WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+ instr_end();
}

/**
@@ -791,7 +794,7 @@ void rcu_idle_exit(void)
* If you add or remove a call to rcu_user_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_user_exit(void)
+void noinstr rcu_user_exit(void)
{
rcu_eqs_exit(1);
}
@@ -839,28 +842,35 @@ static __always_inline void rcu_nmi_ente
rcu_cleanup_after_idle();

incby = 1;
- } else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
- rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
- READ_ONCE(rdp->rcu_urgent_qs) &&
- !READ_ONCE(rdp->rcu_forced_tick)) {
- // We get here only if we had already exited the extended
- // quiescent state and this was an interrupt (not an NMI).
- // Therefore, (1) RCU is already watching and (2) The fact
- // that we are in an interrupt handler and that the rcu_node
- // lock is an irq-disabled lock prevents self-deadlock.
- // So we can safely recheck under the lock.
- raw_spin_lock_rcu_node(rdp->mynode);
- if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
- // A nohz_full CPU is in the kernel and RCU
- // needs a quiescent state. Turn on the tick!
- WRITE_ONCE(rdp->rcu_forced_tick, true);
- tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ } else if (irq) {
+ instr_begin();
+ if (tick_nohz_full_cpu(rdp->cpu) &&
+ rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
+ READ_ONCE(rdp->rcu_urgent_qs) &&
+ !READ_ONCE(rdp->rcu_forced_tick)) {
+ // We get here only if we had already exited the
+ // extended quiescent state and this was an
+ // interrupt (not an NMI). Therefore, (1) RCU is
+ // already watching and (2) The fact that we are in
+ // an interrupt handler and that the rcu_node lock
+ // is an irq-disabled lock prevents self-deadlock.
+ // So we can safely recheck under the lock.
+ raw_spin_lock_rcu_node(rdp->mynode);
+ if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ // A nohz_full CPU is in the kernel and RCU
+ // needs a quiescent state. Turn on the tick!
+ WRITE_ONCE(rdp->rcu_forced_tick, true);
+ tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ }
+ raw_spin_unlock_rcu_node(rdp->mynode);
}
- raw_spin_unlock_rcu_node(rdp->mynode);
+ instr_end();
}
+ instr_begin();
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdp->dynticks_nmi_nesting,
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
+ instr_end();
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
rdp->dynticks_nmi_nesting + incby);
barrier();
@@ -869,11 +879,10 @@ static __always_inline void rcu_nmi_ente
/**
* rcu_nmi_enter - inform RCU of entry to NMI context
*/
-void rcu_nmi_enter(void)
+noinstr void rcu_nmi_enter(void)
{
rcu_nmi_enter_common(false);
}
-NOKPROBE_SYMBOL(rcu_nmi_enter);

/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -897,7 +906,7 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
* If you add or remove a call to rcu_irq_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_irq_enter(void)
+noinstr void rcu_irq_enter(void)
{
lockdep_assert_irqs_disabled();
rcu_nmi_enter_common(true);
@@ -942,7 +951,7 @@ static void rcu_disable_urgency_upon_qs(
* if the current CPU is not in its idle loop or is in an interrupt or
* NMI handler, return true.
*/
-bool notrace rcu_is_watching(void)
+noinstr bool rcu_is_watching(void)
{
bool ret;

@@ -986,7 +995,7 @@ void rcu_request_urgent_qs_task(struct t
* RCU on an offline processor during initial boot, hence the check for
* rcu_scheduler_fully_active.
*/
-bool rcu_lockdep_current_cpu_online(void)
+noinstr bool rcu_lockdep_current_cpu_online(void)
{
struct rcu_data *rdp;
struct rcu_node *rnp;
@@ -994,12 +1003,12 @@ bool rcu_lockdep_current_cpu_online(void

if (in_nmi() || !rcu_scheduler_fully_active)
return true;
- preempt_disable();
+ preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
ret = true;
- preempt_enable();
+ preempt_enable_notrace();
return ret;
}
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2553,7 +2553,7 @@ static void rcu_bind_gp_kthread(void)
}

/* Record the current task on dyntick-idle entry. */
-static void rcu_dynticks_task_enter(void)
+static void noinstr rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
@@ -2561,7 +2561,7 @@ static void rcu_dynticks_task_enter(void
}

/* Record no current task on dyntick-idle exit. */
-static void rcu_dynticks_task_exit(void)
+static void noinstr rcu_dynticks_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -95,7 +95,7 @@ module_param(rcu_normal_after_boot, int,
* Similarly, we avoid claiming an RCU read lock held if the current
* CPU is offline.
*/
-static bool rcu_read_lock_held_common(bool *ret)
+static noinstr bool rcu_read_lock_held_common(bool *ret)
{
if (!debug_lockdep_rcu_enabled()) {
*ret = 1;
@@ -112,7 +112,7 @@ static bool rcu_read_lock_held_common(bo
return false;
}

-int rcu_read_lock_sched_held(void)
+noinstr int rcu_read_lock_sched_held(void)
{
bool ret;

@@ -270,13 +270,12 @@ struct lockdep_map rcu_callback_map =
STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
EXPORT_SYMBOL_GPL(rcu_callback_map);

-int notrace debug_lockdep_rcu_enabled(void)
+noinstr int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
-NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);

/**
* rcu_read_lock_held() - might we be in RCU read-side critical section?


2020-05-05 18:09:49

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [patch V4 part 1 25/36] rcu/tree: Mark the idle relevant functions noinstr

On Tue, May 05, 2020 at 03:16:27PM +0200, Thomas Gleixner wrote:
> These functions are invoked from context tracking and other places in the
> low level entry code. Move them into the .noinstr.text section to exclude
> them from instrumentation.
>
> Mark the places which are safe to invoke traceable functions with
> instr_begin/end() so objtool won't complain.
>
> Signed-off-by: Thomas Gleixner <[email protected]>

Acked-by: Paul E. McKenney <[email protected]>

> ---
> kernel/rcu/tree.c | 85 +++++++++++++++++++++++++----------------------
> kernel/rcu/tree_plugin.h | 4 +-
> kernel/rcu/update.c | 7 +--
> 3 files changed, 52 insertions(+), 44 deletions(-)
>
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -75,9 +75,6 @@
> */
> #define RCU_DYNTICK_CTRL_MASK 0x1
> #define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
> -#ifndef rcu_eqs_special_exit
> -#define rcu_eqs_special_exit() do { } while (0)
> -#endif

Joel has a patch series doing a more thorough removal of this function,
but sometimes it is necessary to remove the function twice, just to
be sure. ;-)

> static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
> .dynticks_nesting = 1,
> @@ -229,7 +226,7 @@ void rcu_softirq_qs(void)
> * RCU is watching prior to the call to this function and is no longer
> * watching upon return.
> */
> -static void rcu_dynticks_eqs_enter(void)
> +static noinstr void rcu_dynticks_eqs_enter(void)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> int seq;
> @@ -253,7 +250,7 @@ static void rcu_dynticks_eqs_enter(void)
> * called from an extended quiescent state, that is, RCU is not watching
> * prior to the call to this function and is watching upon return.
> */
> -static void rcu_dynticks_eqs_exit(void)
> +static noinstr void rcu_dynticks_eqs_exit(void)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
> int seq;
> @@ -270,8 +267,6 @@ static void rcu_dynticks_eqs_exit(void)
> if (seq & RCU_DYNTICK_CTRL_MASK) {
> atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
> smp_mb__after_atomic(); /* _exit after clearing mask. */
> - /* Prefer duplicate flushes to losing a flush. */
> - rcu_eqs_special_exit();
> }
> }
>
> @@ -299,7 +294,7 @@ static void rcu_dynticks_eqs_online(void
> *
> * No ordering, as we are sampling CPU-local information.
> */
> -static bool rcu_dynticks_curr_cpu_in_eqs(void)
> +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>
> @@ -566,7 +561,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data
> * the possibility of usermode upcalls having messed up our count
> * of interrupt nesting level during the prior busy period.
> */
> -static void rcu_eqs_enter(bool user)
> +static noinstr void rcu_eqs_enter(bool user)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>
> @@ -581,12 +576,14 @@ static void rcu_eqs_enter(bool user)
> }
>
> lockdep_assert_irqs_disabled();
> + instr_begin();
> trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> rdp = this_cpu_ptr(&rcu_data);
> do_nocb_deferred_wakeup(rdp);
> rcu_prepare_for_idle();
> rcu_preempt_deferred_qs(current);
> + instr_end();
> WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
> // RCU is watching here ...
> rcu_dynticks_eqs_enter();
> @@ -623,7 +620,7 @@ void rcu_idle_enter(void)
> * If you add or remove a call to rcu_user_enter(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_user_enter(void)
> +noinstr void rcu_user_enter(void)
> {
> lockdep_assert_irqs_disabled();
> rcu_eqs_enter(true);
> @@ -656,19 +653,23 @@ static __always_inline void rcu_nmi_exit
> * leave it in non-RCU-idle state.
> */
> if (rdp->dynticks_nmi_nesting != 1) {
> + instr_begin();
> trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
> atomic_read(&rdp->dynticks));
> WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
> rdp->dynticks_nmi_nesting - 2);
> + instr_end();
> return;
> }
>
> + instr_begin();
> /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
> trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
> WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
>
> if (irq)
> rcu_prepare_for_idle();
> + instr_end();
>
> // RCU is watching here ...
> rcu_dynticks_eqs_enter();
> @@ -684,7 +685,7 @@ static __always_inline void rcu_nmi_exit
> * If you add or remove a call to rcu_nmi_exit(), be sure to test
> * with CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_nmi_exit(void)
> +void noinstr rcu_nmi_exit(void)
> {
> rcu_nmi_exit_common(false);
> }
> @@ -708,7 +709,7 @@ void rcu_nmi_exit(void)
> * If you add or remove a call to rcu_irq_exit(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_irq_exit(void)
> +void noinstr rcu_irq_exit(void)
> {
> lockdep_assert_irqs_disabled();
> rcu_nmi_exit_common(true);
> @@ -737,7 +738,7 @@ void rcu_irq_exit_irqson(void)
> * allow for the possibility of usermode upcalls messing up our count of
> * interrupt nesting level during the busy period that is just now starting.
> */
> -static void rcu_eqs_exit(bool user)
> +static void noinstr rcu_eqs_exit(bool user)
> {
> struct rcu_data *rdp;
> long oldval;
> @@ -755,12 +756,14 @@ static void rcu_eqs_exit(bool user)
> // RCU is not watching here ...
> rcu_dynticks_eqs_exit();
> // ... but is watching here.
> + instr_begin();
> rcu_cleanup_after_idle();
> trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> WRITE_ONCE(rdp->dynticks_nesting, 1);
> WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
> WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
> + instr_end();
> }
>
> /**
> @@ -791,7 +794,7 @@ void rcu_idle_exit(void)
> * If you add or remove a call to rcu_user_exit(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_user_exit(void)
> +void noinstr rcu_user_exit(void)
> {
> rcu_eqs_exit(1);
> }
> @@ -839,28 +842,35 @@ static __always_inline void rcu_nmi_ente
> rcu_cleanup_after_idle();
>
> incby = 1;
> - } else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
> - rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> - READ_ONCE(rdp->rcu_urgent_qs) &&
> - !READ_ONCE(rdp->rcu_forced_tick)) {
> - // We get here only if we had already exited the extended
> - // quiescent state and this was an interrupt (not an NMI).
> - // Therefore, (1) RCU is already watching and (2) The fact
> - // that we are in an interrupt handler and that the rcu_node
> - // lock is an irq-disabled lock prevents self-deadlock.
> - // So we can safely recheck under the lock.
> - raw_spin_lock_rcu_node(rdp->mynode);
> - if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> - // A nohz_full CPU is in the kernel and RCU
> - // needs a quiescent state. Turn on the tick!
> - WRITE_ONCE(rdp->rcu_forced_tick, true);
> - tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> + } else if (irq) {
> + instr_begin();
> + if (tick_nohz_full_cpu(rdp->cpu) &&
> + rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> + READ_ONCE(rdp->rcu_urgent_qs) &&
> + !READ_ONCE(rdp->rcu_forced_tick)) {
> + // We get here only if we had already exited the
> + // extended quiescent state and this was an
> + // interrupt (not an NMI). Therefore, (1) RCU is
> + // already watching and (2) The fact that we are in
> + // an interrupt handler and that the rcu_node lock
> + // is an irq-disabled lock prevents self-deadlock.
> + // So we can safely recheck under the lock.
> + raw_spin_lock_rcu_node(rdp->mynode);
> + if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> + // A nohz_full CPU is in the kernel and RCU
> + // needs a quiescent state. Turn on the tick!
> + WRITE_ONCE(rdp->rcu_forced_tick, true);
> + tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> + }
> + raw_spin_unlock_rcu_node(rdp->mynode);
> }
> - raw_spin_unlock_rcu_node(rdp->mynode);
> + instr_end();
> }
> + instr_begin();
> trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
> rdp->dynticks_nmi_nesting,
> rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
> + instr_end();
> WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
> rdp->dynticks_nmi_nesting + incby);
> barrier();
> @@ -869,11 +879,10 @@ static __always_inline void rcu_nmi_ente
> /**
> * rcu_nmi_enter - inform RCU of entry to NMI context
> */
> -void rcu_nmi_enter(void)
> +noinstr void rcu_nmi_enter(void)
> {
> rcu_nmi_enter_common(false);
> }
> -NOKPROBE_SYMBOL(rcu_nmi_enter);
>
> /**
> * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
> @@ -897,7 +906,7 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
> * If you add or remove a call to rcu_irq_enter(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_irq_enter(void)
> +noinstr void rcu_irq_enter(void)
> {
> lockdep_assert_irqs_disabled();
> rcu_nmi_enter_common(true);
> @@ -942,7 +951,7 @@ static void rcu_disable_urgency_upon_qs(
> * if the current CPU is not in its idle loop or is in an interrupt or
> * NMI handler, return true.
> */
> -bool notrace rcu_is_watching(void)
> +noinstr bool rcu_is_watching(void)
> {
> bool ret;
>
> @@ -986,7 +995,7 @@ void rcu_request_urgent_qs_task(struct t
> * RCU on an offline processor during initial boot, hence the check for
> * rcu_scheduler_fully_active.
> */
> -bool rcu_lockdep_current_cpu_online(void)
> +noinstr bool rcu_lockdep_current_cpu_online(void)
> {
> struct rcu_data *rdp;
> struct rcu_node *rnp;
> @@ -994,12 +1003,12 @@ bool rcu_lockdep_current_cpu_online(void
>
> if (in_nmi() || !rcu_scheduler_fully_active)
> return true;
> - preempt_disable();
> + preempt_disable_notrace();
> rdp = this_cpu_ptr(&rcu_data);
> rnp = rdp->mynode;
> if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
> ret = true;
> - preempt_enable();
> + preempt_enable_notrace();
> return ret;
> }
> EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -2553,7 +2553,7 @@ static void rcu_bind_gp_kthread(void)
> }
>
> /* Record the current task on dyntick-idle entry. */
> -static void rcu_dynticks_task_enter(void)
> +static void noinstr rcu_dynticks_task_enter(void)
> {
> #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
> @@ -2561,7 +2561,7 @@ static void rcu_dynticks_task_enter(void
> }
>
> /* Record no current task on dyntick-idle exit. */
> -static void rcu_dynticks_task_exit(void)
> +static void noinstr rcu_dynticks_task_exit(void)
> {
> #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -95,7 +95,7 @@ module_param(rcu_normal_after_boot, int,
> * Similarly, we avoid claiming an RCU read lock held if the current
> * CPU is offline.
> */
> -static bool rcu_read_lock_held_common(bool *ret)
> +static noinstr bool rcu_read_lock_held_common(bool *ret)
> {
> if (!debug_lockdep_rcu_enabled()) {
> *ret = 1;
> @@ -112,7 +112,7 @@ static bool rcu_read_lock_held_common(bo
> return false;
> }
>
> -int rcu_read_lock_sched_held(void)
> +noinstr int rcu_read_lock_sched_held(void)
> {
> bool ret;
>
> @@ -270,13 +270,12 @@ struct lockdep_map rcu_callback_map =
> STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
> EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> -int notrace debug_lockdep_rcu_enabled(void)
> +noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> current->lockdep_recursion == 0;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
> -NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
>
> /**
> * rcu_read_lock_held() - might we be in RCU read-side critical section?
>

2020-05-19 19:51:27

by Joel Fernandes

[permalink] [raw]
Subject: Re: [patch V4 part 1 25/36] rcu/tree: Mark the idle relevant functions noinstr

Hi Thomas,

On Tue, May 05, 2020 at 03:16:27PM +0200, Thomas Gleixner wrote:
> These functions are invoked from context tracking and other places in the
> low level entry code. Move them into the .noinstr.text section to exclude
> them from instrumentation.
>
> Mark the places which are safe to invoke traceable functions with
> instr_begin/end() so objtool won't complain.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> kernel/rcu/tree.c | 85 +++++++++++++++++++++++++----------------------
> kernel/rcu/tree_plugin.h | 4 +-
> kernel/rcu/update.c | 7 +--
> 3 files changed, 52 insertions(+), 44 deletions(-)
[...]

Just a few nits/questions but otherwise LGTM.

> @@ -299,7 +294,7 @@ static void rcu_dynticks_eqs_online(void
> *
> * No ordering, as we are sampling CPU-local information.
> */
> -static bool rcu_dynticks_curr_cpu_in_eqs(void)
> +static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>
> @@ -566,7 +561,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data
> * the possibility of usermode upcalls having messed up our count
> * of interrupt nesting level during the prior busy period.
> */
> -static void rcu_eqs_enter(bool user)
> +static noinstr void rcu_eqs_enter(bool user)
> {
> struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
>
> @@ -581,12 +576,14 @@ static void rcu_eqs_enter(bool user)
> }
>
> lockdep_assert_irqs_disabled();
> + instr_begin();
> trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> rdp = this_cpu_ptr(&rcu_data);
> do_nocb_deferred_wakeup(rdp);
> rcu_prepare_for_idle();
> rcu_preempt_deferred_qs(current);
> + instr_end();
> WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
> // RCU is watching here ...
> rcu_dynticks_eqs_enter();
> @@ -623,7 +620,7 @@ void rcu_idle_enter(void)
> * If you add or remove a call to rcu_user_enter(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_user_enter(void)
> +noinstr void rcu_user_enter(void)
> {
> lockdep_assert_irqs_disabled();
> rcu_eqs_enter(true);
> @@ -656,19 +653,23 @@ static __always_inline void rcu_nmi_exit
> * leave it in non-RCU-idle state.
> */
> if (rdp->dynticks_nmi_nesting != 1) {
> + instr_begin();
> trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
> atomic_read(&rdp->dynticks));
> WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
> rdp->dynticks_nmi_nesting - 2);
> + instr_end();

Can instr_end() be moved before the WRITE_ONCE() ?

> @@ -737,7 +738,7 @@ void rcu_irq_exit_irqson(void)
> * allow for the possibility of usermode upcalls messing up our count of
> * interrupt nesting level during the busy period that is just now starting.
> */
> -static void rcu_eqs_exit(bool user)
> +static void noinstr rcu_eqs_exit(bool user)
> {
> struct rcu_data *rdp;
> long oldval;
> @@ -755,12 +756,14 @@ static void rcu_eqs_exit(bool user)
> // RCU is not watching here ...
> rcu_dynticks_eqs_exit();
> // ... but is watching here.
> + instr_begin();
> rcu_cleanup_after_idle();
> trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
> WRITE_ONCE(rdp->dynticks_nesting, 1);
> WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
> WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
> + instr_end();

And here I think instr_end() can be moved after the trace_rcu_dyntick() call?

> }
>
> /**
> @@ -791,7 +794,7 @@ void rcu_idle_exit(void)
> * If you add or remove a call to rcu_user_exit(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_user_exit(void)
> +void noinstr rcu_user_exit(void)
> {
> rcu_eqs_exit(1);
> }
> @@ -839,28 +842,35 @@ static __always_inline void rcu_nmi_ente
> rcu_cleanup_after_idle();
>
> incby = 1;
> - } else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
> - rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> - READ_ONCE(rdp->rcu_urgent_qs) &&
> - !READ_ONCE(rdp->rcu_forced_tick)) {
> - // We get here only if we had already exited the extended
> - // quiescent state and this was an interrupt (not an NMI).
> - // Therefore, (1) RCU is already watching and (2) The fact
> - // that we are in an interrupt handler and that the rcu_node
> - // lock is an irq-disabled lock prevents self-deadlock.
> - // So we can safely recheck under the lock.
> - raw_spin_lock_rcu_node(rdp->mynode);
> - if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> - // A nohz_full CPU is in the kernel and RCU
> - // needs a quiescent state. Turn on the tick!
> - WRITE_ONCE(rdp->rcu_forced_tick, true);
> - tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> + } else if (irq) {
> + instr_begin();
> + if (tick_nohz_full_cpu(rdp->cpu) &&

Not a huge fan of the extra indentation but don't see a better way :)

> + rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
> + READ_ONCE(rdp->rcu_urgent_qs) &&
> + !READ_ONCE(rdp->rcu_forced_tick)) {
> + // We get here only if we had already exited the
> + // extended quiescent state and this was an
> + // interrupt (not an NMI). Therefore, (1) RCU is
> + // already watching and (2) The fact that we are in
> + // an interrupt handler and that the rcu_node lock
> + // is an irq-disabled lock prevents self-deadlock.
> + // So we can safely recheck under the lock.
> + raw_spin_lock_rcu_node(rdp->mynode);
> + if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
> + // A nohz_full CPU is in the kernel and RCU
> + // needs a quiescent state. Turn on the tick!
> + WRITE_ONCE(rdp->rcu_forced_tick, true);
> + tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
> + }
> + raw_spin_unlock_rcu_node(rdp->mynode);
> }
> - raw_spin_unlock_rcu_node(rdp->mynode);
> + instr_end();
> }
> + instr_begin();
> trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
> rdp->dynticks_nmi_nesting,
> rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
> + instr_end();
> WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
> rdp->dynticks_nmi_nesting + incby);
> barrier();
> @@ -869,11 +879,10 @@ static __always_inline void rcu_nmi_ente
> /**
> * rcu_nmi_enter - inform RCU of entry to NMI context
> */
> -void rcu_nmi_enter(void)
> +noinstr void rcu_nmi_enter(void)
> {
> rcu_nmi_enter_common(false);
> }
> -NOKPROBE_SYMBOL(rcu_nmi_enter);
>
> /**
> * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
> @@ -897,7 +906,7 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
> * If you add or remove a call to rcu_irq_enter(), be sure to test with
> * CONFIG_RCU_EQS_DEBUG=y.
> */
> -void rcu_irq_enter(void)
> +noinstr void rcu_irq_enter(void)

Just checking if rcu_irq_enter_irqson() needs noinstr too?

Would the noinstr checking be enforced by the kernel build as well or is the
developer required to run it themselves?

Reviewed-by: Joel Fernandes (Google) <[email protected]>

thanks,

- Joel


> {
> lockdep_assert_irqs_disabled();
> rcu_nmi_enter_common(true);
> @@ -942,7 +951,7 @@ static void rcu_disable_urgency_upon_qs(
> * if the current CPU is not in its idle loop or is in an interrupt or
> * NMI handler, return true.
> */
> -bool notrace rcu_is_watching(void)
> +noinstr bool rcu_is_watching(void)
> {
> bool ret;
>
> @@ -986,7 +995,7 @@ void rcu_request_urgent_qs_task(struct t
> * RCU on an offline processor during initial boot, hence the check for
> * rcu_scheduler_fully_active.
> */
> -bool rcu_lockdep_current_cpu_online(void)
> +noinstr bool rcu_lockdep_current_cpu_online(void)
> {
> struct rcu_data *rdp;
> struct rcu_node *rnp;
> @@ -994,12 +1003,12 @@ bool rcu_lockdep_current_cpu_online(void
>
> if (in_nmi() || !rcu_scheduler_fully_active)
> return true;
> - preempt_disable();
> + preempt_disable_notrace();
> rdp = this_cpu_ptr(&rcu_data);
> rnp = rdp->mynode;
> if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
> ret = true;
> - preempt_enable();
> + preempt_enable_notrace();
> return ret;
> }
> EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -2553,7 +2553,7 @@ static void rcu_bind_gp_kthread(void)
> }
>
> /* Record the current task on dyntick-idle entry. */
> -static void rcu_dynticks_task_enter(void)
> +static void noinstr rcu_dynticks_task_enter(void)
> {
> #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
> @@ -2561,7 +2561,7 @@ static void rcu_dynticks_task_enter(void
> }
>
> /* Record no current task on dyntick-idle exit. */
> -static void rcu_dynticks_task_exit(void)
> +static void noinstr rcu_dynticks_task_exit(void)
> {
> #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
> WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -95,7 +95,7 @@ module_param(rcu_normal_after_boot, int,
> * Similarly, we avoid claiming an RCU read lock held if the current
> * CPU is offline.
> */
> -static bool rcu_read_lock_held_common(bool *ret)
> +static noinstr bool rcu_read_lock_held_common(bool *ret)
> {
> if (!debug_lockdep_rcu_enabled()) {
> *ret = 1;
> @@ -112,7 +112,7 @@ static bool rcu_read_lock_held_common(bo
> return false;
> }
>
> -int rcu_read_lock_sched_held(void)
> +noinstr int rcu_read_lock_sched_held(void)
> {
> bool ret;
>
> @@ -270,13 +270,12 @@ struct lockdep_map rcu_callback_map =
> STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
> EXPORT_SYMBOL_GPL(rcu_callback_map);
>
> -int notrace debug_lockdep_rcu_enabled(void)
> +noinstr int notrace debug_lockdep_rcu_enabled(void)
> {
> return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
> current->lockdep_recursion == 0;
> }
> EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
> -NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
>
> /**
> * rcu_read_lock_held() - might we be in RCU read-side critical section?
>

2020-05-19 19:56:36

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

The following commit has been merged into the core/rcu branch of tip:

Commit-ID: ff5c4f5cad33061b07c3fb9187506783c0f3cb66
Gitweb: https://git.kernel.org/tip/ff5c4f5cad33061b07c3fb9187506783c0f3cb66
Author: Thomas Gleixner <[email protected]>
AuthorDate: Fri, 13 Mar 2020 17:32:17 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 19 May 2020 15:51:20 +02:00

rcu/tree: Mark the idle relevant functions noinstr

These functions are invoked from context tracking and other places in the
low level entry code. Move them into the .noinstr.text section to exclude
them from instrumentation.

Mark the places which are safe to invoke traceable functions with
instrumentation_begin/end() so objtool won't complain.

Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Alexandre Chartre <[email protected]>
Acked-by: Peter Zijlstra <[email protected]>
Acked-by: Paul E. McKenney <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]


---
kernel/rcu/tree.c | 83 +++++++++++++++++++++------------------
kernel/rcu/tree_plugin.h | 4 +-
kernel/rcu/update.c | 3 +-
3 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f288477..0713ef3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -88,9 +88,6 @@
*/
#define RCU_DYNTICK_CTRL_MASK 0x1
#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
-#ifndef rcu_eqs_special_exit
-#define rcu_eqs_special_exit() do { } while (0)
-#endif

static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks_nesting = 1,
@@ -242,7 +239,7 @@ void rcu_softirq_qs(void)
* RCU is watching prior to the call to this function and is no longer
* watching upon return.
*/
-static void rcu_dynticks_eqs_enter(void)
+static noinstr void rcu_dynticks_eqs_enter(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
@@ -267,7 +264,7 @@ static void rcu_dynticks_eqs_enter(void)
* called from an extended quiescent state, that is, RCU is not watching
* prior to the call to this function and is watching upon return.
*/
-static void rcu_dynticks_eqs_exit(void)
+static noinstr void rcu_dynticks_eqs_exit(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int seq;
@@ -285,8 +282,6 @@ static void rcu_dynticks_eqs_exit(void)
if (seq & RCU_DYNTICK_CTRL_MASK) {
atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
smp_mb__after_atomic(); /* _exit after clearing mask. */
- /* Prefer duplicate flushes to losing a flush. */
- rcu_eqs_special_exit();
}
}

@@ -314,7 +309,7 @@ static void rcu_dynticks_eqs_online(void)
*
* No ordering, as we are sampling CPU-local information.
*/
-static bool rcu_dynticks_curr_cpu_in_eqs(void)
+static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);

@@ -603,7 +598,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
-static void rcu_eqs_enter(bool user)
+static noinstr void rcu_eqs_enter(bool user)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);

@@ -618,12 +613,14 @@ static void rcu_eqs_enter(bool user)
}

lockdep_assert_irqs_disabled();
+ instrumentation_begin();
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rdp = this_cpu_ptr(&rcu_data);
do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
+ instrumentation_end();
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
rcu_dynticks_eqs_enter();
@@ -660,7 +657,7 @@ void rcu_idle_enter(void)
* If you add or remove a call to rcu_user_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_user_enter(void)
+noinstr void rcu_user_enter(void)
{
lockdep_assert_irqs_disabled();
rcu_eqs_enter(true);
@@ -693,19 +690,23 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
* leave it in non-RCU-idle state.
*/
if (rdp->dynticks_nmi_nesting != 1) {
+ instrumentation_begin();
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
rdp->dynticks_nmi_nesting - 2);
+ instrumentation_end();
return;
}

+ instrumentation_begin();
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */

if (irq)
rcu_prepare_for_idle();
+ instrumentation_end();

// RCU is watching here ...
rcu_dynticks_eqs_enter();
@@ -721,7 +722,7 @@ static __always_inline void rcu_nmi_exit_common(bool irq)
* If you add or remove a call to rcu_nmi_exit(), be sure to test
* with CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_nmi_exit(void)
+void noinstr rcu_nmi_exit(void)
{
rcu_nmi_exit_common(false);
}
@@ -745,7 +746,7 @@ void rcu_nmi_exit(void)
* If you add or remove a call to rcu_irq_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_irq_exit(void)
+void noinstr rcu_irq_exit(void)
{
lockdep_assert_irqs_disabled();
rcu_nmi_exit_common(true);
@@ -774,7 +775,7 @@ void rcu_irq_exit_irqson(void)
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
-static void rcu_eqs_exit(bool user)
+static void noinstr rcu_eqs_exit(bool user)
{
struct rcu_data *rdp;
long oldval;
@@ -792,12 +793,14 @@ static void rcu_eqs_exit(bool user)
// RCU is not watching here ...
rcu_dynticks_eqs_exit();
// ... but is watching here.
+ instrumentation_begin();
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
WRITE_ONCE(rdp->dynticks_nesting, 1);
WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
+ instrumentation_end();
}

/**
@@ -828,7 +831,7 @@ void rcu_idle_exit(void)
* If you add or remove a call to rcu_user_exit(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_user_exit(void)
+void noinstr rcu_user_exit(void)
{
rcu_eqs_exit(1);
}
@@ -876,28 +879,35 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
rcu_cleanup_after_idle();

incby = 1;
- } else if (irq && tick_nohz_full_cpu(rdp->cpu) &&
- rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
- READ_ONCE(rdp->rcu_urgent_qs) &&
- !READ_ONCE(rdp->rcu_forced_tick)) {
- // We get here only if we had already exited the extended
- // quiescent state and this was an interrupt (not an NMI).
- // Therefore, (1) RCU is already watching and (2) The fact
- // that we are in an interrupt handler and that the rcu_node
- // lock is an irq-disabled lock prevents self-deadlock.
- // So we can safely recheck under the lock.
- raw_spin_lock_rcu_node(rdp->mynode);
- if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
- // A nohz_full CPU is in the kernel and RCU
- // needs a quiescent state. Turn on the tick!
- WRITE_ONCE(rdp->rcu_forced_tick, true);
- tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ } else if (irq) {
+ instrumentation_begin();
+ if (tick_nohz_full_cpu(rdp->cpu) &&
+ rdp->dynticks_nmi_nesting == DYNTICK_IRQ_NONIDLE &&
+ READ_ONCE(rdp->rcu_urgent_qs) &&
+ !READ_ONCE(rdp->rcu_forced_tick)) {
+ // We get here only if we had already exited the
+ // extended quiescent state and this was an
+ // interrupt (not an NMI). Therefore, (1) RCU is
+ // already watching and (2) The fact that we are in
+ // an interrupt handler and that the rcu_node lock
+ // is an irq-disabled lock prevents self-deadlock.
+ // So we can safely recheck under the lock.
+ raw_spin_lock_rcu_node(rdp->mynode);
+ if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ // A nohz_full CPU is in the kernel and RCU
+ // needs a quiescent state. Turn on the tick!
+ WRITE_ONCE(rdp->rcu_forced_tick, true);
+ tick_dep_set_cpu(rdp->cpu, TICK_DEP_BIT_RCU);
+ }
+ raw_spin_unlock_rcu_node(rdp->mynode);
}
- raw_spin_unlock_rcu_node(rdp->mynode);
+ instrumentation_end();
}
+ instrumentation_begin();
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdp->dynticks_nmi_nesting,
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
+ instrumentation_end();
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
rdp->dynticks_nmi_nesting + incby);
barrier();
@@ -906,11 +916,10 @@ static __always_inline void rcu_nmi_enter_common(bool irq)
/**
* rcu_nmi_enter - inform RCU of entry to NMI context
*/
-void rcu_nmi_enter(void)
+noinstr void rcu_nmi_enter(void)
{
rcu_nmi_enter_common(false);
}
-NOKPROBE_SYMBOL(rcu_nmi_enter);

/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -934,7 +943,7 @@ NOKPROBE_SYMBOL(rcu_nmi_enter);
* If you add or remove a call to rcu_irq_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_irq_enter(void)
+noinstr void rcu_irq_enter(void)
{
lockdep_assert_irqs_disabled();
rcu_nmi_enter_common(true);
@@ -979,7 +988,7 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
* if the current CPU is not in its idle loop or is in an interrupt or
* NMI handler, return true.
*/
-bool notrace rcu_is_watching(void)
+bool rcu_is_watching(void)
{
bool ret;

@@ -1031,12 +1040,12 @@ bool rcu_lockdep_current_cpu_online(void)

if (in_nmi() || !rcu_scheduler_fully_active)
return true;
- preempt_disable();
+ preempt_disable_notrace();
rdp = this_cpu_ptr(&rcu_data);
rnp = rdp->mynode;
if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
ret = true;
- preempt_enable();
+ preempt_enable_notrace();
return ret;
}
EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 50caa3f..3522236 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2539,7 +2539,7 @@ static void rcu_bind_gp_kthread(void)
}

/* Record the current task on dyntick-idle entry. */
-static void rcu_dynticks_task_enter(void)
+static void noinstr rcu_dynticks_task_enter(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
@@ -2547,7 +2547,7 @@ static void rcu_dynticks_task_enter(void)
}

/* Record no current task on dyntick-idle exit. */
-static void rcu_dynticks_task_exit(void)
+static void noinstr rcu_dynticks_task_exit(void)
{
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3ce63a9..84843ad 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -284,13 +284,12 @@ struct lockdep_map rcu_callback_map =
STATIC_LOCKDEP_MAP_INIT("rcu_callback", &rcu_callback_key);
EXPORT_SYMBOL_GPL(rcu_callback_map);

-int notrace debug_lockdep_rcu_enabled(void)
+noinstr int notrace debug_lockdep_rcu_enabled(void)
{
return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks &&
current->lockdep_recursion == 0;
}
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
-NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);

/**
* rcu_read_lock_held() - might we be in RCU read-side critical section?

2020-09-28 23:24:34

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

On Mon, Sep 28, 2020 at 05:22:33PM -0500, Kim Phillips wrote:
> Hi,
>
> On 5/19/20 2:52 PM, tip-bot2 for Thomas Gleixner wrote:
> > The following commit has been merged into the core/rcu branch of tip:
> >
> > Commit-ID: ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> > Gitweb: https://git.kernel.org/tip/ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> > Author: Thomas Gleixner <[email protected]>
> > AuthorDate: Fri, 13 Mar 2020 17:32:17 +01:00
> > Committer: Thomas Gleixner <[email protected]>
> > CommitterDate: Tue, 19 May 2020 15:51:20 +02:00
> >
> > rcu/tree: Mark the idle relevant functions noinstr
> >
> > These functions are invoked from context tracking and other places in the
> > low level entry code. Move them into the .noinstr.text section to exclude
> > them from instrumentation.
> >
> > Mark the places which are safe to invoke traceable functions with
> > instrumentation_begin/end() so objtool won't complain.
> >
> > Signed-off-by: Thomas Gleixner <[email protected]>
> > Reviewed-by: Alexandre Chartre <[email protected]>
> > Acked-by: Peter Zijlstra <[email protected]>
> > Acked-by: Paul E. McKenney <[email protected]>
> > Link: https://lkml.kernel.org/r/[email protected]
> >
> >
> > ---
>
> I bisected a system hang condition down to this commit.
>
> To reproduce the hang, compile the below code and execute it as root
> on an x86_64 server (AMD or Intel). The code is opening a
> PERF_TYPE_TRACEPOINT event with a non-zero pe.config.
>
> If I revert the commit from Linus' ToT, the system stays up.

"Linus' ToT" is current mainline? If so, what does your revert look like?
Over here that revert wants to be hand applied for current mainline.

Thanx, Paul

> .config attached.
>
> Thanks,
>
> Kim
>
> #include <stdlib.h>
> #include <stdio.h>
> #include <unistd.h>
> #include <string.h>
> #include <sys/ioctl.h>
> #include <linux/perf_event.h>
> #include <asm/unistd.h>
>
> static long
> perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
> int cpu, int group_fd, unsigned long flags)
> {
> int ret;
>
> ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
> group_fd, flags);
> return ret;
> }
>
> int
> main(int argc, char **argv)
> {
> struct perf_event_attr pe;
> long long count;
> int fd;
>
> memset(&pe, 0, sizeof(struct perf_event_attr));
> pe.type = PERF_TYPE_TRACEPOINT;
> pe.size = sizeof(struct perf_event_attr);
> pe.config = PERF_COUNT_HW_INSTRUCTIONS;
> pe.disabled = 1;
> pe.exclude_kernel = 1;
> pe.exclude_hv = 1;
>
> fd = perf_event_open(&pe, 0, -1, -1, 0);
> if (fd == -1) {
> fprintf(stderr, "Error opening leader %llx\n", pe.config);
> exit(EXIT_FAILURE);
> }
> }


2020-09-28 23:57:40

by Kim Phillips

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

Hi,

On 5/19/20 2:52 PM, tip-bot2 for Thomas Gleixner wrote:
> The following commit has been merged into the core/rcu branch of tip:
>
> Commit-ID: ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> Gitweb: https://git.kernel.org/tip/ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> Author: Thomas Gleixner <[email protected]>
> AuthorDate: Fri, 13 Mar 2020 17:32:17 +01:00
> Committer: Thomas Gleixner <[email protected]>
> CommitterDate: Tue, 19 May 2020 15:51:20 +02:00
>
> rcu/tree: Mark the idle relevant functions noinstr
>
> These functions are invoked from context tracking and other places in the
> low level entry code. Move them into the .noinstr.text section to exclude
> them from instrumentation.
>
> Mark the places which are safe to invoke traceable functions with
> instrumentation_begin/end() so objtool won't complain.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Reviewed-by: Alexandre Chartre <[email protected]>
> Acked-by: Peter Zijlstra <[email protected]>
> Acked-by: Paul E. McKenney <[email protected]>
> Link: https://lkml.kernel.org/r/[email protected]
>
>
> ---

I bisected a system hang condition down to this commit.

To reproduce the hang, compile the below code and execute it as root
on an x86_64 server (AMD or Intel). The code is opening a
PERF_TYPE_TRACEPOINT event with a non-zero pe.config.

If I revert the commit from Linus' ToT, the system stays up.

.config attached.

Thanks,

Kim

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>

static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;

ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}

int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count;
int fd;

memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_TRACEPOINT;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;

fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
}


Attachments:
config.xz (28.80 kB)

2020-09-29 07:29:58

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

On Mon, Sep 28, 2020 at 05:22:33PM -0500, Kim Phillips wrote:
> On 5/19/20 2:52 PM, tip-bot2 for Thomas Gleixner wrote:
> > The following commit has been merged into the core/rcu branch of tip:
> >
> > Commit-ID: ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> > Gitweb: https://git.kernel.org/tip/ff5c4f5cad33061b07c3fb9187506783c0f3cb66
> > Author: Thomas Gleixner <[email protected]>
> > AuthorDate: Fri, 13 Mar 2020 17:32:17 +01:00
> > Committer: Thomas Gleixner <[email protected]>
> > CommitterDate: Tue, 19 May 2020 15:51:20 +02:00
> >
> > rcu/tree: Mark the idle relevant functions noinstr
> >
> > These functions are invoked from context tracking and other places in the
> > low level entry code. Move them into the .noinstr.text section to exclude
> > them from instrumentation.
> >
> > Mark the places which are safe to invoke traceable functions with
> > instrumentation_begin/end() so objtool won't complain.
> >
> > Signed-off-by: Thomas Gleixner <[email protected]>
> > Reviewed-by: Alexandre Chartre <[email protected]>
> > Acked-by: Peter Zijlstra <[email protected]>
> > Acked-by: Paul E. McKenney <[email protected]>
> > Link: https://lkml.kernel.org/r/[email protected]
> >
> >
> > ---
>
> I bisected a system hang condition down to this commit.

That's odd, mostly its the lack of noinstr that causes hangs, I've never
yet seen the presence of it cause problems.

> To reproduce the hang, compile the below code and execute it as root
> on an x86_64 server (AMD or Intel). The code is opening a
> PERF_TYPE_TRACEPOINT event with a non-zero pe.config.

In my experience, it is very relevant which exact tracepoint you end up
using.

PERF_COUNT_HW_INSTRUCTIONS is a very long and tedious way of writing 1
in this case, on my randonly selected test box this morning, trace event
1 is:

$ for file in /debug/tracing/events/*/*/id ; do echo $file -- $(cat $file); done | grep " 1$"
/debug/tracing/events/ftrace/function/id -- 1

> If I revert the commit from Linus' ToT, the system stays up.

> memset(&pe, 0, sizeof(struct perf_event_attr));
> pe.type = PERF_TYPE_TRACEPOINT;
> pe.size = sizeof(struct perf_event_attr);
> pe.config = PERF_COUNT_HW_INSTRUCTIONS;
> pe.disabled = 1;

Doubly funny for not actually enabling the event...

> pe.exclude_kernel = 1;
> pe.exclude_hv = 1;

Still, it seems to make my machine unhappy.. Let's see if I can get
anything useful out of it.

2020-09-29 11:43:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

On Tue, May 19, 2020 at 07:52:33PM -0000, tip-bot2 for Thomas Gleixner wrote:
> @@ -979,7 +988,7 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
> * if the current CPU is not in its idle loop or is in an interrupt or
> * NMI handler, return true.
> */
> -bool notrace rcu_is_watching(void)
> +bool rcu_is_watching(void)
> {
> bool ret;
>

This ^..

it is required because __ftrace_ops_list_func() /
ftrace_ops_assist_func() call it outside of ftrace recursion, but only
when FL_RCU, and perf happens to be the only user of that.

another morning wasted... :/

2020-09-29 14:37:00

by Steven Rostedt

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

On Tue, 29 Sep 2020 13:25:41 +0200
Peter Zijlstra <[email protected]> wrote:

> On Tue, May 19, 2020 at 07:52:33PM -0000, tip-bot2 for Thomas Gleixner wrote:
> > @@ -979,7 +988,7 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
> > * if the current CPU is not in its idle loop or is in an interrupt or
> > * NMI handler, return true.
> > */
> > -bool notrace rcu_is_watching(void)
> > +bool rcu_is_watching(void)
> > {
> > bool ret;
> >
>
> This ^..
>
> it is required because __ftrace_ops_list_func() /
> ftrace_ops_assist_func() call it outside of ftrace recursion, but only
> when FL_RCU, and perf happens to be the only user of that.
>
> another morning wasted... :/

Those are fun to debug. :-p (I'll use this in another email).

Anyway, you bring up a good point. I should have this:

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 84f32dbc7be8..2d76eaaad4a7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6993,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
{
int bit;

- if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching())
- return;
-
bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
if (bit < 0)
return;

preempt_disable_notrace();

- op->func(ip, parent_ip, op, regs);
+ if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
+ op->func(ip, parent_ip, op, regs);

preempt_enable_notrace();
trace_clear_recursion(bit);



-- Steve

2020-09-29 14:57:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [tip: core/rcu] rcu/tree: Mark the idle relevant functions noinstr

On Tue, Sep 29, 2020 at 10:34:54AM -0400, Steven Rostedt wrote:
> Anyway, you bring up a good point. I should have this:

Acked-by: Peter Zijlstra (Intel) <[email protected]>

> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 84f32dbc7be8..2d76eaaad4a7 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -6993,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
> {
> int bit;
>
> - if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching())
> - return;
> -
> bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
> if (bit < 0)
> return;
>
> preempt_disable_notrace();
>
> - op->func(ip, parent_ip, op, regs);
> + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
> + op->func(ip, parent_ip, op, regs);
>
> preempt_enable_notrace();
> trace_clear_recursion(bit);
>
>
>
> -- Steve