2024-05-30 13:46:11

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 00/11] rcu/nocb: (De-)offloading on offline CPUs

Last LPC's debates seem to have raised general agreement that nohz_full
cpusets interface should operate on offline CPUs to simplify the picture.
And since the only known future user of NOCB (de-)offloading is going
to be nohz_full cpusets, its transitions need to operate on offline
CPUs as well.

The good news is that it simplifies a bit the (de-)offloading code, as
the diffstat testifies.

Thanks.

Frederic Weisbecker (11):
rcu/nocb: Introduce RCU_NOCB_LOCKDEP_WARN()
rcu/nocb: Move nocb field at the end of state struct
rcu/nocb: Assert no callbacks while nocb kthread allocation fails
rcu/nocb: Introduce nocb mutex
rcu/nocb: (De-)offload callbacks on offline CPUs only
rcu/nocb: Remove halfway (de-)offloading handling from bypass
rcu/nocb: Remove halfway (de-)offloading handling from rcu_core()'s QS
reporting
rcu/nocb: Remove halfway (de-)offloading handling from rcu_core
rcu/nocb: Remove SEGCBLIST_RCU_CORE
rcu/nocb: Remove SEGCBLIST_KTHREAD_CB
rcu/nocb: Simplify (de-)offloading state machine

include/linux/rcu_segcblist.h | 6 +-
include/linux/rcupdate.h | 7 +
kernel/rcu/rcu_segcblist.c | 11 --
kernel/rcu/rcu_segcblist.h | 11 +-
kernel/rcu/tree.c | 45 +-----
kernel/rcu/tree.h | 6 +-
kernel/rcu/tree_nocb.h | 266 +++++++++++++---------------------
kernel/rcu/tree_plugin.h | 5 +-
8 files changed, 122 insertions(+), 235 deletions(-)

--
2.45.1



2024-05-30 13:46:28

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 01/11] rcu/nocb: Introduce RCU_NOCB_LOCKDEP_WARN()

Checking for races against concurrent (de-)offloading implies the
creation of !CONFIG_RCU_NOCB_CPU stubs to check if each relevant lock
is held. For now this only implies the nocb_lock but more are to be
expected.

Create instead a NOCB specific version of RCU_LOCKDEP_WARN() to avoid
the proliferation of stubs.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/rcupdate.h | 7 +++++++
kernel/rcu/tree_nocb.h | 14 --------------
kernel/rcu/tree_plugin.h | 4 ++--
3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index be450a3477be..9161e00a0cf4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -145,11 +145,18 @@ void rcu_init_nohz(void);
int rcu_nocb_cpu_offload(int cpu);
int rcu_nocb_cpu_deoffload(int cpu);
void rcu_nocb_flush_deferred_wakeup(void);
+
+#define RCU_NOCB_LOCKDEP_WARN(c, s) RCU_LOCKDEP_WARN(c, s)
+
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+
static inline void rcu_init_nohz(void) { }
static inline int rcu_nocb_cpu_offload(int cpu) { return -EINVAL; }
static inline int rcu_nocb_cpu_deoffload(int cpu) { return 0; }
static inline void rcu_nocb_flush_deferred_wakeup(void) { }
+
+#define RCU_NOCB_LOCKDEP_WARN(c, s)
+
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */

/*
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 3ce30841119a..f4112fc663a7 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -16,10 +16,6 @@
#ifdef CONFIG_RCU_NOCB_CPU
static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
-static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
-{
- return lockdep_is_held(&rdp->nocb_lock);
-}

static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
{
@@ -1653,16 +1649,6 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)

#else /* #ifdef CONFIG_RCU_NOCB_CPU */

-static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
-{
- return 0;
-}
-
-static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
-{
- return false;
-}
-
/* No ->nocb_lock to acquire. */
static void rcu_nocb_lock(struct rcu_data *rdp)
{
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 51125f4130fd..0d6b152a9a17 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -24,10 +24,10 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
* timers have their own means of synchronization against the
* offloaded state updaters.
*/
- RCU_LOCKDEP_WARN(
+ RCU_NOCB_LOCKDEP_WARN(
!(lockdep_is_held(&rcu_state.barrier_mutex) ||
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
- rcu_lockdep_is_held_nocb(rdp) ||
+ lockdep_is_held(&rdp->nocb_lock) ||
(!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
--
2.45.1


2024-05-30 13:46:37

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 02/11] rcu/nocb: Move nocb field at the end of state struct

nocb_is_setup is a rarely used field, mostly on boot and CPU hotplug.
It shouldn't occupy the middle of the rcu state hot fields cacheline.

Move it to the end and build it conditionally while at it. More cold
NOCB fields are to come.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index fcf2b4aa3441..a297dc89a09c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -411,7 +411,6 @@ struct rcu_state {
arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
/* Synchronize offline with */
/* GP pre-initialization. */
- int nocb_is_setup; /* nocb is setup from boot */

/* synchronize_rcu() part. */
struct llist_head srs_next; /* request a GP users. */
@@ -420,6 +419,10 @@ struct rcu_state {
struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX];
struct work_struct srs_cleanup_work;
atomic_t srs_cleanups_pending; /* srs inflight worker cleanups. */
+
+#ifdef CONFIG_RCU_NOCB_CPU
+ int nocb_is_setup; /* nocb is setup from boot */
+#endif
};

/* Values for rcu_state structure's gp_flags field. */
--
2.45.1


2024-05-30 13:47:17

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 04/11] rcu/nocb: Introduce nocb mutex

The barrier_mutex is used currently to protect (de-)offloading
operations and prevent from nocb_lock locking imbalance in rcu_barrier()
and shrinker, and also from misordered RCU barrier invocation.

Now since RCU (de-)offloading is going to happen on offline CPUs, an RCU
barrier will have to be executed while transitionning from offloaded to
de-offloaded state. And this can't happen while holding the
barrier_mutex.

Introduce a NOCB mutex to protect (de-)offloading transitions. The
barrier_mutex is still held for now when necessary to avoid barrier
callbacks reordering and nocb_lock imbalance.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree.c | 3 +++
kernel/rcu/tree.h | 1 +
kernel/rcu/tree_nocb.h | 20 ++++++++++++--------
kernel/rcu/tree_plugin.h | 1 +
4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4cbc4e78a8c5..e904c187c281 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,6 +97,9 @@ static struct rcu_state rcu_state = {
.srs_cleanup_work = __WORK_INITIALIZER(rcu_state.srs_cleanup_work,
rcu_sr_normal_gp_cleanup_work),
.srs_cleanups_pending = ATOMIC_INIT(0),
+#ifdef CONFIG_RCU_NOCB_CPU
+ .nocb_mutex = __MUTEX_INITIALIZER(rcu_state.nocb_mutex),
+#endif
};

/* Dump rcu_node combining tree at boot to verify correct setup. */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index a297dc89a09c..16e6fe63d93c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -421,6 +421,7 @@ struct rcu_state {
atomic_t srs_cleanups_pending; /* srs inflight worker cleanups. */

#ifdef CONFIG_RCU_NOCB_CPU
+ struct mutex nocb_mutex; /* Guards (de-)offloading */
int nocb_is_setup; /* nocb is setup from boot */
#endif
};
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index fdd0616f2fd1..16bcb8b13a5e 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1141,6 +1141,7 @@ int rcu_nocb_cpu_deoffload(int cpu)
int ret = 0;

cpus_read_lock();
+ mutex_lock(&rcu_state.nocb_mutex);
mutex_lock(&rcu_state.barrier_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
if (cpu_online(cpu)) {
@@ -1153,6 +1154,7 @@ int rcu_nocb_cpu_deoffload(int cpu)
}
}
mutex_unlock(&rcu_state.barrier_mutex);
+ mutex_unlock(&rcu_state.nocb_mutex);
cpus_read_unlock();

return ret;
@@ -1228,6 +1230,7 @@ int rcu_nocb_cpu_offload(int cpu)
int ret = 0;

cpus_read_lock();
+ mutex_lock(&rcu_state.nocb_mutex);
mutex_lock(&rcu_state.barrier_mutex);
if (!rcu_rdp_is_offloaded(rdp)) {
if (cpu_online(cpu)) {
@@ -1240,6 +1243,7 @@ int rcu_nocb_cpu_offload(int cpu)
}
}
mutex_unlock(&rcu_state.barrier_mutex);
+ mutex_unlock(&rcu_state.nocb_mutex);
cpus_read_unlock();

return ret;
@@ -1257,7 +1261,7 @@ lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
return 0;

/* Protect rcu_nocb_mask against concurrent (de-)offloading. */
- if (!mutex_trylock(&rcu_state.barrier_mutex))
+ if (!mutex_trylock(&rcu_state.nocb_mutex))
return 0;

/* Snapshot count of all CPUs */
@@ -1267,7 +1271,7 @@ lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
count += READ_ONCE(rdp->lazy_len);
}

- mutex_unlock(&rcu_state.barrier_mutex);
+ mutex_unlock(&rcu_state.nocb_mutex);

return count ? count : SHRINK_EMPTY;
}
@@ -1285,9 +1289,9 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
* Protect against concurrent (de-)offloading. Otherwise nocb locking
* may be ignored or imbalanced.
*/
- if (!mutex_trylock(&rcu_state.barrier_mutex)) {
+ if (!mutex_trylock(&rcu_state.nocb_mutex)) {
/*
- * But really don't insist if barrier_mutex is contended since we
+ * But really don't insist if nocb_mutex is contended since we
* can't guarantee that it will never engage in a dependency
* chain involving memory allocation. The lock is seldom contended
* anyway.
@@ -1326,7 +1330,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
break;
}

- mutex_unlock(&rcu_state.barrier_mutex);
+ mutex_unlock(&rcu_state.nocb_mutex);

return count ? count : SHRINK_STOP;
}
@@ -1473,15 +1477,15 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
* No need to protect against concurrent rcu_barrier()
* because the number of callbacks should be 0 for a non-boot CPU,
* therefore rcu_barrier() shouldn't even try to grab the nocb_lock.
- * But hold barrier_mutex to avoid nocb_lock imbalance from shrinker.
+ * But hold nocb_mutex to avoid nocb_lock imbalance from shrinker.
*/
WARN_ON_ONCE(system_state > SYSTEM_BOOTING && rcu_segcblist_n_cbs(&rdp->cblist));
- mutex_lock(&rcu_state.barrier_mutex);
+ mutex_lock(&rcu_state.nocb_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
rcu_nocb_rdp_deoffload(rdp);
cpumask_clear_cpu(cpu, rcu_nocb_mask);
}
- mutex_unlock(&rcu_state.barrier_mutex);
+ mutex_unlock(&rcu_state.nocb_mutex);
}

/* How many CB CPU IDs per GP kthread? Default of -1 for sqrt(nr_cpu_ids). */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d6b152a9a17..05239042a08b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -28,6 +28,7 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
!(lockdep_is_held(&rcu_state.barrier_mutex) ||
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
lockdep_is_held(&rdp->nocb_lock) ||
+ lockdep_is_held(&rcu_state.nocb_mutex) ||
(!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
--
2.45.1


2024-05-30 13:47:56

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 03/11] rcu/nocb: Assert no callbacks while nocb kthread allocation fails

When a NOCB CPU fails to create a nocb kthread on bringup, the CPU is
then deoffloaded. The barrier mutex is locked at this stage. It is
typically used to protect against concurrent (de-)offloading and/or
concurrent rcu_barrier() that would otherwise risk a nocb locking
imbalance. However:

* rcu_barrier() can't run concurrently if it's the boot CPU on early
boot-up.

* rcu_barrier() can run concurrently if it's a secondary CPU but it is
expected to see 0 callbacks on this target because it's the first
time it boots.

* (de-)offloading can't happen concurrently with smp_init(), as
rcutorture is initialized later, at least not before device_initcall(),
and userspace isn't available yet.

* (de-)offloading can't happen concurrently with cpu_up(), courtesy of
cpu_hotplug_lock.

But:

* The lazy shrinker might run concurrently with cpu_up(). It shouldn't
try to grab the nocb_lock and risk an imbalance due to lazy_len
supposed to be 0 but be extra cautious.

* Also be cautious against resume from hibernation potential subtleties.

So keep the locking and add some assertions and comments.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree_nocb.h | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index f4112fc663a7..fdd0616f2fd1 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1442,7 +1442,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
"rcuog/%d", rdp_gp->cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) {
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
- goto end;
+ goto err;
}
WRITE_ONCE(rdp_gp->nocb_gp_kthread, t);
if (kthread_prio)
@@ -1454,7 +1454,7 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
t = kthread_create(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
- goto end;
+ goto err;

if (rcu_rdp_is_offloaded(rdp))
wake_up_process(t);
@@ -1467,7 +1467,15 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
WRITE_ONCE(rdp->nocb_cb_kthread, t);
WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread);
return;
-end:
+
+err:
+ /*
+ * No need to protect against concurrent rcu_barrier()
+ * because the number of callbacks should be 0 for a non-boot CPU,
+ * therefore rcu_barrier() shouldn't even try to grab the nocb_lock.
+ * But hold barrier_mutex to avoid nocb_lock imbalance from shrinker.
+ */
+ WARN_ON_ONCE(system_state > SYSTEM_BOOTING && rcu_segcblist_n_cbs(&rdp->cblist));
mutex_lock(&rcu_state.barrier_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
rcu_nocb_rdp_deoffload(rdp);
--
2.45.1


2024-05-30 13:48:05

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 06/11] rcu/nocb: Remove halfway (de-)offloading handling from bypass

Bypass enqueue can't happen anymore in the middle of (de-)offloading
since this sort of transition now only applies to offline CPUs.

The related safety check can therefore be removed.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree_nocb.h | 8 --------
1 file changed, 8 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 8e766396df3a..af44e75eb0cd 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -409,14 +409,6 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
return false;
}

- // In the process of (de-)offloading: no bypassing, but
- // locking.
- if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
- rcu_nocb_lock(rdp);
- *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
- return false; /* Not offloaded, no bypassing. */
- }
-
// Don't use ->nocb_bypass during early boot.
if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
rcu_nocb_lock(rdp);
--
2.45.1


2024-05-30 13:48:45

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 08/11] rcu/nocb: Remove halfway (de-)offloading handling from rcu_core

RCU core can't be running anymore while in the middle of (de-)offloading
since this sort of transition now only applies to offline CPUs.

The locked callback acceleration handling during the transition can
therefore be removed, along with concurrent batch execution.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree.c | 26 ++++----------------------
1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c502a38dc5f3..4f1863a0536d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2776,24 +2776,6 @@ static __latent_entropy void rcu_core(void)
unsigned long flags;
struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
- /*
- * On RT rcu_core() can be preempted when IRQs aren't disabled.
- * Therefore this function can race with concurrent NOCB (de-)offloading
- * on this CPU and the below condition must be considered volatile.
- * However if we race with:
- *
- * _ Offloading: In the worst case we accelerate or process callbacks
- * concurrently with NOCB kthreads. We are guaranteed to
- * call rcu_nocb_lock() if that happens.
- *
- * _ Deoffloading: In the worst case we miss callbacks acceleration or
- * processing. This is fine because the early stage
- * of deoffloading invokes rcu_core() after setting
- * SEGCBLIST_RCU_CORE. So we guarantee that we'll process
- * what could have been dismissed without the need to wait
- * for the next rcu_pending() check in the next jiffy.
- */
- const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);

if (cpu_is_offline(smp_processor_id()))
return;
@@ -2813,17 +2795,17 @@ static __latent_entropy void rcu_core(void)

/* No grace period and unregistered callbacks? */
if (!rcu_gp_in_progress() &&
- rcu_segcblist_is_enabled(&rdp->cblist) && do_batch) {
- rcu_nocb_lock_irqsave(rdp, flags);
+ rcu_segcblist_is_enabled(&rdp->cblist) && !rcu_rdp_is_offloaded(rdp)) {
+ local_irq_save(flags);
if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
rcu_accelerate_cbs_unlocked(rnp, rdp);
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ local_irq_restore(flags);
}

rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());

/* If there are callbacks ready, invoke them. */
- if (do_batch && rcu_segcblist_ready_cbs(&rdp->cblist) &&
+ if (!rcu_rdp_is_offloaded(rdp) && rcu_segcblist_ready_cbs(&rdp->cblist) &&
likely(READ_ONCE(rcu_scheduler_fully_active))) {
rcu_do_batch(rdp);
/* Re-invoke RCU core processing if there are callbacks remaining. */
--
2.45.1


2024-05-30 13:48:58

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 09/11] rcu/nocb: Remove SEGCBLIST_RCU_CORE

RCU core can't be running anymore while in the middle of (de-)offloading
since this sort of transition now only applies to offline CPUs.

The SEGCBLIST_RCU_CORE state can therefore be removed.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/rcu_segcblist.h | 9 ++++-----
kernel/rcu/rcu_segcblist.h | 9 ---------
kernel/rcu/tree.c | 3 ---
kernel/rcu/tree_nocb.h | 9 ---------
4 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ba95c06675e1..5469c54cd778 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -185,11 +185,10 @@ struct rcu_cblist {
* ----------------------------------------------------------------------------
*/
#define SEGCBLIST_ENABLED BIT(0)
-#define SEGCBLIST_RCU_CORE BIT(1)
-#define SEGCBLIST_LOCKING BIT(2)
-#define SEGCBLIST_KTHREAD_CB BIT(3)
-#define SEGCBLIST_KTHREAD_GP BIT(4)
-#define SEGCBLIST_OFFLOADED BIT(5)
+#define SEGCBLIST_LOCKING BIT(1)
+#define SEGCBLIST_KTHREAD_CB BIT(2)
+#define SEGCBLIST_KTHREAD_GP BIT(3)
+#define SEGCBLIST_OFFLOADED BIT(4)

struct rcu_segcblist {
struct rcu_head *head;
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 4fe877f5f654..7a0962dfee86 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -95,15 +95,6 @@ static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
return false;
}

-static inline bool rcu_segcblist_completely_offloaded(struct rcu_segcblist *rsclp)
-{
- if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- !rcu_segcblist_test_flags(rsclp, SEGCBLIST_RCU_CORE))
- return true;
-
- return false;
-}
-
/*
* Are all segments following the specified segment of the specified
* rcu_segcblist structure empty of callbacks? (The specified
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4f1863a0536d..8bec3c0c9636 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -79,9 +79,6 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *);

static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.gpwrap = true,
-#ifdef CONFIG_RCU_NOCB_CPU
- .cblist.flags = SEGCBLIST_RCU_CORE,
-#endif
};
static struct rcu_state rcu_state = {
.level = { &rcu_state.node[0] },
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index af44e75eb0cd..24daf606de0c 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1060,7 +1060,6 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));

- rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
wake_gp = rdp_offload_toggle(rdp, false, flags);

mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
@@ -1168,13 +1167,6 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
swait_event_exclusive(rdp->nocb_state_wq,
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));

- /*
- * All kthreads are ready to work, we can finally enable nocb bypass.
- */
- rcu_nocb_lock_irqsave(rdp, flags);
- rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
- rcu_nocb_unlock_irqrestore(rdp, flags);
-
return 0;
}

@@ -1350,7 +1342,6 @@ void __init rcu_init_nohz(void)
rcu_segcblist_init(&rdp->cblist);
rcu_segcblist_offload(&rdp->cblist, true);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
- rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
}
rcu_organize_nocb_kthreads();
}
--
2.45.1


2024-05-30 13:49:23

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 11/11] rcu/nocb: Simplify (de-)offloading state machine

Now that the (de-)offloading process can only apply to offline CPUs,
there is no more concurrency between rcu_core and nocb kthreads. Also
the mutation now happens on empty queues.

Therefore the state machine can be reduced to a single bit called
SEGCBLIST_OFFLOADED. Simplify the transition as follows:

* Upon offloading: queue the rdp to be added to the rcuog list and
wait for the rcuog kthread to set the SEGCBLIST_OFFLOADED bit. Unpark
rcuo kthread.

* Upon de-offloading: Park rcuo kthread. Queue the rdp to be removed
from the rcuog list and wait for the rcuog kthread to clear the
SEGCBLIST_OFFLOADED bit.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/rcu_segcblist.h | 4 +-
kernel/rcu/rcu_segcblist.c | 11 ---
kernel/rcu/rcu_segcblist.h | 2 +-
kernel/rcu/tree_nocb.h | 129 ++++++++++++++++------------------
4 files changed, 61 insertions(+), 85 deletions(-)

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 1ef1bb54853d..2fdc2208f1ca 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -185,9 +185,7 @@ struct rcu_cblist {
* ----------------------------------------------------------------------------
*/
#define SEGCBLIST_ENABLED BIT(0)
-#define SEGCBLIST_LOCKING BIT(1)
-#define SEGCBLIST_KTHREAD_GP BIT(2)
-#define SEGCBLIST_OFFLOADED BIT(3)
+#define SEGCBLIST_OFFLOADED BIT(1)

struct rcu_segcblist {
struct rcu_head *head;
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 1693ea22ef1b..298a2c573f02 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -260,17 +260,6 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
rcu_segcblist_clear_flags(rsclp, SEGCBLIST_ENABLED);
}

-/*
- * Mark the specified rcu_segcblist structure as offloaded (or not)
- */
-void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
-{
- if (offload)
- rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
- else
- rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
-}
-
/*
* Does the specified rcu_segcblist structure contain callbacks that
* are ready to be invoked?
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 7a0962dfee86..259904075636 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -89,7 +89,7 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
{
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
- rcu_segcblist_test_flags(rsclp, SEGCBLIST_LOCKING))
+ rcu_segcblist_test_flags(rsclp, SEGCBLIST_OFFLOADED))
return true;

return false;
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 24daf606de0c..72a2990d2087 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -604,37 +604,33 @@ static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
}
}

-static int nocb_gp_toggle_rdp(struct rcu_data *rdp)
+static void nocb_gp_toggle_rdp(struct rcu_data *rdp_gp, struct rcu_data *rdp)
{
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
- int ret;

- rcu_nocb_lock_irqsave(rdp, flags);
- if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) &&
- !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
+ /*
+ * Locking orders future de-offloaded callbacks enqueue against previous
+ * handling of this rdp. Ie: Make sure rcuog is done with this rdp before
+ * deoffloaded callbacks can be enqueued.
+ */
+ raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+ if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
/*
* Offloading. Set our flag and notify the offload worker.
* We will handle this rdp until it ever gets de-offloaded.
*/
- rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
- ret = 1;
- } else if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) &&
- rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
+ list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp);
+ rcu_segcblist_set_flags(cblist, SEGCBLIST_OFFLOADED);
+ } else {
/*
* De-offloading. Clear our flag and notify the de-offload worker.
* We will ignore this rdp until it ever gets re-offloaded.
*/
- rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
- ret = 0;
- } else {
- WARN_ON_ONCE(1);
- ret = -1;
+ list_del(&rdp->nocb_entry_rdp);
+ rcu_segcblist_clear_flags(cblist, SEGCBLIST_OFFLOADED);
}
-
- rcu_nocb_unlock_irqrestore(rdp, flags);
-
- return ret;
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
}

static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu)
@@ -841,14 +837,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
}

if (rdp_toggling) {
- int ret;
-
- ret = nocb_gp_toggle_rdp(rdp_toggling);
- if (ret == 1)
- list_add_tail(&rdp_toggling->nocb_entry_rdp, &my_rdp->nocb_head_rdp);
- else if (ret == 0)
- list_del(&rdp_toggling->nocb_entry_rdp);
-
+ nocb_gp_toggle_rdp(my_rdp, rdp_toggling);
swake_up_one(&rdp_toggling->nocb_state_wq);
}

@@ -1018,16 +1007,11 @@ void rcu_nocb_flush_deferred_wakeup(void)
}
EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup);

-static int rdp_offload_toggle(struct rcu_data *rdp,
- bool offload, unsigned long flags)
- __releases(rdp->nocb_lock)
+static int rcu_nocb_queue_toggle_rdp(struct rcu_data *rdp)
{
- struct rcu_segcblist *cblist = &rdp->cblist;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
bool wake_gp = false;
-
- rcu_segcblist_offload(cblist, offload);
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ unsigned long flags;

raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
// Queue this rdp for add/del to/from the list to iterate on rcuog
@@ -1041,9 +1025,25 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
return wake_gp;
}

+static bool rcu_nocb_rdp_deoffload_wait_cond(struct rcu_data *rdp)
+{
+ unsigned long flags;
+ bool ret;
+
+ /*
+ * Locking makes sure rcuog is done handling this rdp before deoffloaded
+ * enqueue can happen. Also it keeps the SEGCBLIST_OFFLOADED flag stable
+ * while the ->nocb_lock is held.
+ */
+ raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+ ret = !rcu_segcblist_test_flags(&rdp->cblist, SEGCBLIST_OFFLOADED);
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+
+ return ret;
+}
+
static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
{
- struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;
@@ -1059,19 +1059,20 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
rcu_nocb_lock_irqsave(rdp, flags);
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+ rcu_nocb_unlock_irqrestore(rdp, flags);

- wake_gp = rdp_offload_toggle(rdp, false, flags);
+ wake_gp = rcu_nocb_queue_toggle_rdp(rdp);

mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
if (rdp_gp->nocb_gp_kthread) {
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);

- swait_event_exclusive(rdp->nocb_state_wq,
- !rcu_segcblist_test_flags(cblist,
- SEGCBLIST_KTHREAD_GP));
if (rdp->nocb_cb_kthread)
kthread_park(rdp->nocb_cb_kthread);
+
+ swait_event_exclusive(rdp->nocb_state_wq,
+ rcu_nocb_rdp_deoffload_wait_cond(rdp));
} else {
/*
* No kthread to clear the flags for us or remove the rdp from the nocb list
@@ -1079,29 +1080,14 @@ static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
* but we stick to paranoia in this rare path.
*/
rcu_nocb_lock_irqsave(rdp, flags);
- rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
- rcu_nocb_unlock_irqrestore(rdp, flags);
+ rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_OFFLOADED);
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);

list_del(&rdp->nocb_entry_rdp);
}
+
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);

- /*
- * Lock one last time to acquire latest callback updates from kthreads
- * so we can later handle callbacks locally without locking.
- */
- rcu_nocb_lock_irqsave(rdp, flags);
- /*
- * Theoretically we could clear SEGCBLIST_LOCKING after the nocb
- * lock is released but how about being paranoid for once?
- */
- rcu_segcblist_clear_flags(cblist, SEGCBLIST_LOCKING);
- /*
- * Without SEGCBLIST_LOCKING, we can't use
- * rcu_nocb_unlock_irqrestore() anymore.
- */
- raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-
return 0;
}

@@ -1129,10 +1115,20 @@ int rcu_nocb_cpu_deoffload(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);

+static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp)
+{
+ unsigned long flags;
+ bool ret;
+
+ raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+ ret = rcu_segcblist_test_flags(&rdp->cblist, SEGCBLIST_OFFLOADED);
+ raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+
+ return ret;
+}
+
static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
{
- struct rcu_segcblist *cblist = &rdp->cblist;
- unsigned long flags;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;

@@ -1152,20 +1148,14 @@ static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));

- /*
- * Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
- * is set.
- */
- raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
-
- wake_gp = rdp_offload_toggle(rdp, true, flags);
+ wake_gp = rcu_nocb_queue_toggle_rdp(rdp);
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);

- kthread_unpark(rdp->nocb_cb_kthread);
-
swait_event_exclusive(rdp->nocb_state_wq,
- rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
+ rcu_nocb_rdp_offload_wait_cond(rdp));
+
+ kthread_unpark(rdp->nocb_cb_kthread);

return 0;
}
@@ -1340,8 +1330,7 @@ void __init rcu_init_nohz(void)
rdp = per_cpu_ptr(&rcu_data, cpu);
if (rcu_segcblist_empty(&rdp->cblist))
rcu_segcblist_init(&rdp->cblist);
- rcu_segcblist_offload(&rdp->cblist, true);
- rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
+ rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_OFFLOADED);
}
rcu_organize_nocb_kthreads();
}
--
2.45.1


2024-05-30 13:56:21

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 10/11] rcu/nocb: Remove SEGCBLIST_KTHREAD_CB

This state excerpt from the (de-)offloading state machine was used to
implement an ad-hoc kthread parking of rcuo kthreads. This code has
been removed and therefore the related state can be erased as well.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/rcu_segcblist.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 5469c54cd778..1ef1bb54853d 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -186,9 +186,8 @@ struct rcu_cblist {
*/
#define SEGCBLIST_ENABLED BIT(0)
#define SEGCBLIST_LOCKING BIT(1)
-#define SEGCBLIST_KTHREAD_CB BIT(2)
-#define SEGCBLIST_KTHREAD_GP BIT(3)
-#define SEGCBLIST_OFFLOADED BIT(4)
+#define SEGCBLIST_KTHREAD_GP BIT(2)
+#define SEGCBLIST_OFFLOADED BIT(3)

struct rcu_segcblist {
struct rcu_head *head;
--
2.45.1


2024-05-30 13:57:17

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 05/11] rcu/nocb: (De-)offload callbacks on offline CPUs only

Currently callbacks can be (de-)offloaded only on online CPUs. This
involves an overly elaborated state machine in order to make sure that
callbacks are always handled during the process while ensuring
synchronization between rcu_core and NOCB kthreads.

The only potential user of NOCB (de-)offloading appears to be a
nohz_full toggling interface through cpusets. And the general agreement
is now to work toward toggling the nohz_full state on offline CPUs to
simplify the whole picture.

Therefore, convert the (de-)offloading to only support offline CPUs.
This involves the following changes:

* Call rcu_barrier() before deoffloading. An offline offloaded CPU may
still carry callbacks in its queue ignored by
rcutree_migrate_callbacks(). Those callbacks must all be flushed
before switching to a regular queue because no more kthreads will
handle those before the CPU ever gets re-onlined.

This means that further calls to rcu_barrier() will find an empty
queue until the CPU goes through rcutree_report_cpu_starting(). As a
result it is guaranteed that further rcu_barrier() won't try to lock
the nocb_lock for that target and thus won't risk an imbalance.

Therefore barrier_mutex doesn't need to be locked anymore upon
deoffloading.

* Assume the queue is empty before offloading, as
rcutree_migrate_callbacks() took care of everything.

This means that further calls to rcu_barrier() will find an empty
queue until the CPU goes through rcutree_report_cpu_starting(). As a
result it is guaranteed that further rcu_barrier() won't risk a
nocb_lock imbalance.

Therefore barrier_mutex doesn't need to be locked anymore upon
offloading.

* No need to flush bypass anymore.

Further simplifications will follow in upcoming patches.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree_nocb.h | 82 +++++++++++-------------------------------
1 file changed, 21 insertions(+), 61 deletions(-)

diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 16bcb8b13a5e..8e766396df3a 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1049,43 +1049,26 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
return wake_gp;
}

-static long rcu_nocb_rdp_deoffload(void *arg)
+static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp)
{
- struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;

- /*
- * rcu_nocb_rdp_deoffload() may be called directly if
- * rcuog/o[p] spawn failed, because at this time the rdp->cpu
- * is not online yet.
- */
- WARN_ON_ONCE((rdp->cpu != raw_smp_processor_id()) && cpu_online(rdp->cpu));
+ /* CPU must be offline, unless it's early boot */
+ WARN_ON_ONCE(cpu_online(rdp->cpu) && rdp->cpu != raw_smp_processor_id());

pr_info("De-offloading %d\n", rdp->cpu);

+ /* Flush all callbacks from segcblist and bypass */
+ rcu_barrier();
+
rcu_nocb_lock_irqsave(rdp, flags);
- /*
- * Flush once and for all now. This suffices because we are
- * running on the target CPU holding ->nocb_lock (thus having
- * interrupts disabled), and because rdp_offload_toggle()
- * invokes rcu_segcblist_offload(), which clears SEGCBLIST_OFFLOADED.
- * Thus future calls to rcu_segcblist_completely_offloaded() will
- * return false, which means that future calls to rcu_nocb_try_bypass()
- * will refuse to put anything into the bypass.
- */
- WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
- /*
- * Start with invoking rcu_core() early. This way if the current thread
- * happens to preempt an ongoing call to rcu_core() in the middle,
- * leaving some work dismissed because rcu_core() still thinks the rdp is
- * completely offloaded, we are guaranteed a nearby future instance of
- * rcu_core() to catch up.
- */
+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+
rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
- invoke_rcu_core();
wake_gp = rdp_offload_toggle(rdp, false, flags);

mutex_lock(&rdp_gp->nocb_gp_kthread_mutex);
@@ -1128,10 +1111,6 @@ static long rcu_nocb_rdp_deoffload(void *arg)
*/
raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);

- /* Sanity check */
- WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
-
-
return 0;
}

@@ -1142,18 +1121,16 @@ int rcu_nocb_cpu_deoffload(int cpu)

cpus_read_lock();
mutex_lock(&rcu_state.nocb_mutex);
- mutex_lock(&rcu_state.barrier_mutex);
if (rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_deoffload, rdp);
+ if (!cpu_online(cpu)) {
+ ret = rcu_nocb_rdp_deoffload(rdp);
if (!ret)
cpumask_clear_cpu(cpu, rcu_nocb_mask);
} else {
- pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu);
+ pr_info("NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu);
ret = -EINVAL;
}
}
- mutex_unlock(&rcu_state.barrier_mutex);
mutex_unlock(&rcu_state.nocb_mutex);
cpus_read_unlock();

@@ -1161,15 +1138,14 @@ int rcu_nocb_cpu_deoffload(int cpu)
}
EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload);

-static long rcu_nocb_rdp_offload(void *arg)
+static int rcu_nocb_rdp_offload(struct rcu_data *rdp)
{
- struct rcu_data *rdp = arg;
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
int wake_gp;
struct rcu_data *rdp_gp = rdp->nocb_gp_rdp;

- WARN_ON_ONCE(rdp->cpu != raw_smp_processor_id());
+ WARN_ON_ONCE(cpu_online(rdp->cpu));
/*
* For now we only support re-offload, ie: the rdp must have been
* offloaded on boot first.
@@ -1182,28 +1158,15 @@ static long rcu_nocb_rdp_offload(void *arg)

pr_info("Offloading %d\n", rdp->cpu);

+ WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass));
+ WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist));
+
/*
* Can't use rcu_nocb_lock_irqsave() before SEGCBLIST_LOCKING
* is set.
*/
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);

- /*
- * We didn't take the nocb lock while working on the
- * rdp->cblist with SEGCBLIST_LOCKING cleared (pure softirq/rcuc mode).
- * Every modifications that have been done previously on
- * rdp->cblist must be visible remotely by the nocb kthreads
- * upon wake up after reading the cblist flags.
- *
- * The layout against nocb_lock enforces that ordering:
- *
- * __rcu_nocb_rdp_offload() nocb_cb_wait()/nocb_gp_wait()
- * ------------------------- ----------------------------
- * WRITE callbacks rcu_nocb_lock()
- * rcu_nocb_lock() READ flags
- * WRITE flags READ callbacks
- * rcu_nocb_unlock() rcu_nocb_unlock()
- */
wake_gp = rdp_offload_toggle(rdp, true, flags);
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
@@ -1214,8 +1177,7 @@ static long rcu_nocb_rdp_offload(void *arg)
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));

/*
- * All kthreads are ready to work, we can finally relieve rcu_core() and
- * enable nocb bypass.
+ * All kthreads are ready to work, we can finally enable nocb bypass.
*/
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(cblist, SEGCBLIST_RCU_CORE);
@@ -1231,18 +1193,16 @@ int rcu_nocb_cpu_offload(int cpu)

cpus_read_lock();
mutex_lock(&rcu_state.nocb_mutex);
- mutex_lock(&rcu_state.barrier_mutex);
if (!rcu_rdp_is_offloaded(rdp)) {
- if (cpu_online(cpu)) {
- ret = work_on_cpu(cpu, rcu_nocb_rdp_offload, rdp);
+ if (!cpu_online(cpu)) {
+ ret = rcu_nocb_rdp_offload(rdp);
if (!ret)
cpumask_set_cpu(cpu, rcu_nocb_mask);
} else {
- pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu);
+ pr_info("NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu);
ret = -EINVAL;
}
}
- mutex_unlock(&rcu_state.barrier_mutex);
mutex_unlock(&rcu_state.nocb_mutex);
cpus_read_unlock();

--
2.45.1


2024-05-30 13:58:10

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 07/11] rcu/nocb: Remove halfway (de-)offloading handling from rcu_core()'s QS reporting

RCU core can't be running anymore while in the middle of (de-)offloading
since this sort of transition now only applies to offline CPUs.

The locked callback acceleration handling during the transition can
therefore be removed.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/rcu/tree.c | 13 -------------
1 file changed, 13 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e904c187c281..c502a38dc5f3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2381,7 +2381,6 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
{
unsigned long flags;
unsigned long mask;
- bool needacc = false;
struct rcu_node *rnp;

WARN_ON_ONCE(rdp->cpu != smp_processor_id());
@@ -2418,23 +2417,11 @@ rcu_report_qs_rdp(struct rcu_data *rdp)
* to return true. So complain, but don't awaken.
*/
WARN_ON_ONCE(rcu_accelerate_cbs(rnp, rdp));
- } else if (!rcu_segcblist_completely_offloaded(&rdp->cblist)) {
- /*
- * ...but NOCB kthreads may miss or delay callbacks acceleration
- * if in the middle of a (de-)offloading process.
- */
- needacc = true;
}

rcu_disable_urgency_upon_qs(rdp);
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
/* ^^^ Released rnp->lock */
-
- if (needacc) {
- rcu_nocb_lock_irqsave(rdp, flags);
- rcu_accelerate_cbs_unlocked(rnp, rdp);
- rcu_nocb_unlock_irqrestore(rdp, flags);
- }
}
}

--
2.45.1