From: Lai Jiangshan <[email protected]>
X86 can access percpu data in a single instruction.
Use per-cpu rcu preempt count and make it able to be inlined.
patch 1-7: prepare
patch 8-10: implement PCPU_RCU_PREEMPT_COUNT
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Lai Jiangshan (10):
lib: Use rcu_preempt_depth() to replace current->rcu_read_lock_nesting
rcu: Move rcu_preempt_depth_set() to rcupdate.h
rcu: Reorder tree_exp.h after tree_plugin.h
rcu: Add macros set_rcu_preempt_special() and
clear_rcu_preempt_special()
rcu: Make rcu_read_unlock_special() global
rcu: Rename marco __LINUX_RCU_H to __KERNEL_RCU_H
sched/core: Add rcu_preempt_switch()
rcu: Implement PCPU_RCU_PREEMPT_COUNT framework
x86/rcu: Add rcu_preempt_count
x86/rcu: Add THUNK rcu_read_unlock_special_thunk
arch/x86/Kconfig | 1 +
arch/x86/entry/thunk_64.S | 5 ++
arch/x86/include/asm/current.h | 3 +
arch/x86/include/asm/rcu_preempt.h | 109 +++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 7 +-
include/linux/rcupdate.h | 36 ++++++++++
kernel/rcu/Kconfig | 8 +++
kernel/rcu/rcu.h | 15 +++-
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree_exp.h | 2 +-
kernel/rcu/tree_plugin.h | 41 +++++++----
kernel/sched/core.c | 2 +
lib/locking-selftest.c | 6 +-
13 files changed, 211 insertions(+), 26 deletions(-)
create mode 100644 arch/x86/include/asm/rcu_preempt.h
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Use the general wrapper rcu_preempt_depth() instead of the open code.
Prepare for enabling per-cpu rcu_preempt_count, in which case
current->rcu_read_lock_nesting might not be synced with the real depth.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
lib/locking-selftest.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6f6a5fc85b42..9bb41fb18088 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1434,7 +1434,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
#ifdef CONFIG_SMP
int saved_mgd_count = current->migration_disabled;
#endif
- int saved_rcu_count = current->rcu_read_lock_nesting;
+ int saved_rcu_count = rcu_preempt_depth();
#endif
WARN_ON(irqs_disabled());
@@ -1476,9 +1476,9 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
migrate_enable();
#endif
- while (current->rcu_read_lock_nesting > saved_rcu_count)
+ while (rcu_preempt_depth() > saved_rcu_count)
rcu_read_unlock();
- WARN_ON_ONCE(current->rcu_read_lock_nesting < saved_rcu_count);
+ WARN_ON_ONCE(rcu_preempt_depth() < saved_rcu_count);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Prepare for arch-specific-defined rcu_preempt_depth_set().
No functionality change intended, but it has to be defined as a macro
as rcupdate.h is a very low level header included from areas that don't
even know about the task struct "current".
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
include/linux/rcupdate.h | 1 +
kernel/rcu/tree_plugin.h | 5 -----
2 files changed, 1 insertion(+), 5 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 16f519914415..210f65baf47c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -78,6 +78,7 @@ void __rcu_read_unlock(void);
* types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
*/
#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
+#define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
#else /* #ifdef CONFIG_PREEMPT_RCU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 36a8b5dbf5b5..b1264096d03a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -387,11 +387,6 @@ static int rcu_preempt_read_exit(void)
return ret;
}
-static void rcu_preempt_depth_set(int val)
-{
- WRITE_ONCE(current->rcu_read_lock_nesting, val);
-}
-
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Enable tree_exp.h using some rcu_preempt macros introduced in
the next patch. The new macros touch core rcu_preempt fields
and are better to be implemented in tree_plugin.h.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/tree.c | 2 +-
kernel/rcu/tree_plugin.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d9642dd06c25..57d1ae26861f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -5245,6 +5245,6 @@ void __init rcu_init(void)
}
#include "tree_stall.h"
-#include "tree_exp.h"
#include "tree_nocb.h"
#include "tree_plugin.h"
+#include "tree_exp.h"
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b1264096d03a..d899b4afc21c 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -103,6 +103,7 @@ static void __init rcu_bootup_announce_oddness(void)
#ifdef CONFIG_PREEMPT_RCU
static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
+static bool sync_rcu_exp_done(struct rcu_node *rnp);
static void rcu_read_unlock_special(struct task_struct *t);
/*
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Add wrappers accessing to t->rcu_read_unlock_special.b so that the
wrappers can be adapted for synching the value with the later-introduced
per-cpu rcu_preempt_count.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/tree_exp.h | 2 +-
kernel/rcu/tree_plugin.h | 20 +++++++++++++-------
2 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6b83537480b1..4e91414552e6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -725,7 +725,7 @@ static void rcu_exp_handler(void *unused)
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->expmask & rdp->grpmask) {
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
- t->rcu_read_unlock_special.b.exp_hint = true;
+ set_rcu_preempt_special(exp_hint);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d899b4afc21c..3dbd5609185e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -106,6 +106,14 @@ static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
static bool sync_rcu_exp_done(struct rcu_node *rnp);
static void rcu_read_unlock_special(struct task_struct *t);
+#define set_rcu_preempt_special(reason) do { \
+ WRITE_ONCE(current->rcu_read_unlock_special.b.reason, true); \
+ } while (0)
+
+#define clear_rcu_preempt_special(reason) do { \
+ WRITE_ONCE(current->rcu_read_unlock_special.b.reason, false); \
+ } while (0)
+
/*
* Tell them what RCU they are running.
*/
@@ -293,7 +301,7 @@ static void rcu_qs(void)
TPS("cpuqs"));
__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
- WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, false);
+ clear_rcu_preempt_special(need_qs);
}
}
@@ -325,7 +333,7 @@ void rcu_note_context_switch(bool preempt)
/* Possibly blocking in an RCU read-side critical section. */
rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp);
- t->rcu_read_unlock_special.b.blocked = true;
+ set_rcu_preempt_special(blocked);
t->rcu_blocked_node = rnp;
/*
@@ -399,7 +407,7 @@ void __rcu_read_lock(void)
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
WARN_ON_ONCE(rcu_preempt_depth() > RCU_NEST_PMAX);
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && rcu_state.gp_kthread)
- WRITE_ONCE(current->rcu_read_unlock_special.b.need_qs, true);
+ set_rcu_preempt_special(need_qs);
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -738,7 +746,7 @@ static void rcu_flavor_sched_clock_irq(int user)
__this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
!t->rcu_read_unlock_special.b.need_qs &&
time_after(jiffies, rcu_state.gp_start + HZ))
- t->rcu_read_unlock_special.b.need_qs = true;
+ set_rcu_preempt_special(need_qs);
}
/*
@@ -751,12 +759,10 @@ static void rcu_flavor_sched_clock_irq(int user)
*/
void exit_rcu(void)
{
- struct task_struct *t = current;
-
if (unlikely(!list_empty(¤t->rcu_node_entry))) {
rcu_preempt_depth_set(1);
barrier();
- WRITE_ONCE(t->rcu_read_unlock_special.b.blocked, true);
+ set_rcu_preempt_special(blocked);
} else if (unlikely(rcu_preempt_depth())) {
rcu_preempt_depth_set(1);
} else {
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
to reflect the path of kernel/rcu/rcu.h.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/rcu.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 86fce206560e..b17b2ed657fc 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -7,8 +7,8 @@
* Author: Paul E. McKenney <[email protected]>
*/
-#ifndef __LINUX_RCU_H
-#define __LINUX_RCU_H
+#ifndef __KERNEL_RCU_H
+#define __KERNEL_RCU_H
#include <linux/slab.h>
#include <trace/events/rcu.h>
@@ -668,4 +668,4 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v);
static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
-#endif /* __LINUX_RCU_H */
+#endif /* __KERNEL_RCU_H */
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
When the per-cpu rcu_preempt_count is used, it has to be switched too
on context-switching. And the instructions to switch the per-cpu
rcu_preempt_count are meager, so it is inlined to avoid the overhead
in the scheduler.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
kernel/rcu/rcu.h | 5 +++++
kernel/sched/core.c | 2 ++
2 files changed, 7 insertions(+)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index b17b2ed657fc..ea5ae957c687 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -668,4 +668,9 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v);
static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
+static inline void
+rcu_preempt_switch(struct task_struct *prev, struct task_struct *next)
+{
+}
+
#endif /* __KERNEL_RCU_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d44efa0d0611..7c3fedd9180c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -95,6 +95,7 @@
#include "../workqueue_internal.h"
#include "../../io_uring/io-wq.h"
#include "../smpboot.h"
+#include "../rcu/rcu.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
@@ -6727,6 +6728,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
*/
++*switch_count;
+ rcu_preempt_switch(prev, next);
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
corresponding functions, rcu_preempt core uses the functions to
implement rcu_read_[un]lock, rcu_preempt_depth(), special bits,
switching and so on.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
kernel/rcu/Kconfig | 8 ++++++++
kernel/rcu/rcu.h | 4 ++++
kernel/rcu/tree_plugin.h | 8 ++++++++
4 files changed, 53 insertions(+)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cc77d76a870b..bf369741ef93 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -70,6 +70,8 @@ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned
void rcu_read_unlock_special(void);
+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
void __rcu_read_lock(void);
void __rcu_read_unlock(void);
@@ -81,6 +83,37 @@ void __rcu_read_unlock(void);
*/
#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
#define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
+#define pcpu_rcu_preempt_special_set() do { } while (0)
+#define pcpu_rcu_preempt_special_clear() do { } while (0)
+
+#else /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
+#include <asm/rcu_preempt.h>
+
+static __always_inline void __rcu_read_lock(void)
+{
+ pcpu_rcu_preempt_count_add(1);
+ barrier();
+}
+
+static __always_inline void __rcu_read_unlock(void)
+{
+ barrier();
+ if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))
+ pcpu_rcu_read_unlock_special();
+}
+
+static inline int rcu_preempt_depth(void)
+{
+ return pcpu_rcu_preempt_count();
+}
+
+static inline void rcu_preempt_depth_set(int val)
+{
+ pcpu_rcu_preempt_count_set(val);
+}
+
+#endif /* #else #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
#else /* #ifdef CONFIG_PREEMPT_RCU */
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index e7d2dd267593..5d91147bc9a3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -345,4 +345,12 @@ config RCU_DOUBLE_CHECK_CB_TIME
Say Y here if you need tighter callback-limit enforcement.
Say N here if you are unsure.
+config HAVE_PCPU_RCU_PREEMPT_COUNT
+ bool
+
+config PCPU_RCU_PREEMPT_COUNT
+ def_bool y
+ depends on PREEMPT_RCU && HAVE_PCPU_RCU_PREEMPT_COUNT
+ depends on !PROVE_LOCKING && !RCU_STRICT_GRACE_PERIOD
+
endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index ea5ae957c687..2322b040c5cd 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -671,6 +671,10 @@ static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { re
static inline void
rcu_preempt_switch(struct task_struct *prev, struct task_struct *next)
{
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+ prev->rcu_read_lock_nesting = rcu_preempt_depth();
+ pcpu_rcu_preempt_switch(next->rcu_read_lock_nesting, next->rcu_read_unlock_special.s);
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
}
#endif /* __KERNEL_RCU_H */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 880b3fef1158..db68d0c1c1f2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -107,10 +107,13 @@ static bool sync_rcu_exp_done(struct rcu_node *rnp);
#define set_rcu_preempt_special(reason) do { \
WRITE_ONCE(current->rcu_read_unlock_special.b.reason, true); \
+ pcpu_rcu_preempt_special_set(); \
} while (0)
#define clear_rcu_preempt_special(reason) do { \
WRITE_ONCE(current->rcu_read_unlock_special.b.reason, false); \
+ if (!current->rcu_read_unlock_special.s) \
+ pcpu_rcu_preempt_special_clear(); \
} while (0)
/*
@@ -379,6 +382,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
return READ_ONCE(rnp->gp_tasks) != NULL;
}
+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
/* limit value for ->rcu_read_lock_nesting. */
#define RCU_NEST_PMAX (INT_MAX / 2)
@@ -436,6 +441,8 @@ void __rcu_read_unlock(void)
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+#endif /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
@@ -489,6 +496,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return;
}
t->rcu_read_unlock_special.s = 0;
+ pcpu_rcu_preempt_special_clear();
if (special.b.need_qs) {
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rdp->cpu_no_qs.b.norm = false;
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Add rcu_read_unlock_special_thunk(), so that the inlined rcu_read_unlock()
doesn't need any code to save the caller-saved registers.
Make rcu_read_unlock() only two instructions in the slow path at the
caller site.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
arch/x86/entry/thunk_64.S | 5 +++++
arch/x86/include/asm/rcu_preempt.h | 4 +++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index 119ebdc3d362..10c60369a67c 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -13,3 +13,8 @@ THUNK preempt_schedule_thunk, preempt_schedule
THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
+
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+THUNK rcu_read_unlock_special_thunk, rcu_read_unlock_special
+EXPORT_SYMBOL_GPL(rcu_read_unlock_special_thunk)
+#endif /* #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT */
diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
index cb25ebe038a5..acdd73b74c05 100644
--- a/arch/x86/include/asm/rcu_preempt.h
+++ b/arch/x86/include/asm/rcu_preempt.h
@@ -97,9 +97,11 @@ static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
__percpu_arg([var]));
}
+extern asmlinkage void rcu_read_unlock_special_thunk(void);
+
#define pcpu_rcu_read_unlock_special() \
do { \
- rcu_read_unlock_special(); \
+ asm volatile ("call rcu_read_unlock_special_thunk" : ASM_CALL_CONSTRAINT);\
} while (0)
#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Make it global so that it can be used in the future inlined rcu_read_unlock().
Make it exported so that the inlined rcu_read_unlock() can be used in modules.
Make it taking a void-argument so that the caller can reduce instructions
obtaining the "current" task value and it can be used in THUNK later.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
include/linux/rcupdate.h | 2 ++
kernel/rcu/tree_plugin.h | 7 ++++---
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 210f65baf47c..cc77d76a870b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -68,6 +68,8 @@ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned
#ifdef CONFIG_PREEMPT_RCU
+void rcu_read_unlock_special(void);
+
void __rcu_read_lock(void);
void __rcu_read_unlock(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 3dbd5609185e..880b3fef1158 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -104,7 +104,6 @@ static void __init rcu_bootup_announce_oddness(void)
static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
static bool sync_rcu_exp_done(struct rcu_node *rnp);
-static void rcu_read_unlock_special(struct task_struct *t);
#define set_rcu_preempt_special(reason) do { \
WRITE_ONCE(current->rcu_read_unlock_special.b.reason, true); \
@@ -427,7 +426,7 @@ void __rcu_read_unlock(void)
if (rcu_preempt_read_exit() == 0) {
barrier(); // critical-section exit before .s check.
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
- rcu_read_unlock_special(t);
+ rcu_read_unlock_special();
}
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
int rrln = rcu_preempt_depth();
@@ -627,8 +626,9 @@ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
* notify RCU core processing or task having blocked during the RCU
* read-side critical section.
*/
-static void rcu_read_unlock_special(struct task_struct *t)
+void rcu_read_unlock_special(void)
{
+ struct task_struct *t = current;
unsigned long flags;
bool irqs_were_disabled;
bool preempt_bh_were_disabled =
@@ -684,6 +684,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
}
rcu_preempt_deferred_qs_irqrestore(t, flags);
}
+EXPORT_SYMBOL_GPL(rcu_read_unlock_special);
/*
* Check that the list of blocked tasks for the newly completed grace
--
2.19.1.6.gb485710b
From: Lai Jiangshan <[email protected]>
Implement PCPU_RCU_PREEMPT_COUNT for x86.
Mainly copied from asm/preempt.h
Make rcu_read_[un]lock() inlined for rcu-preempt.
Make rcu_read_lock() only one instruction.
Make rcu_read_unlock() only two instructions in the fast path.
Cc: "Paul E. McKenney" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Signed-off-by: Lai Jiangshan <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/current.h | 3 +
arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 7 +-
4 files changed, 115 insertions(+), 3 deletions(-)
create mode 100644 arch/x86/include/asm/rcu_preempt.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78050d5d7fac..7eb17c12f7b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -257,6 +257,7 @@ config X86
select HAVE_OBJTOOL if X86_64
select HAVE_OPTPROBES
select HAVE_PAGE_SIZE_4KB
+ select HAVE_PCPU_RCU_PREEMPT_COUNT
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..dcc2ef784120 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -24,6 +24,9 @@ struct pcpu_hot {
unsigned long top_of_stack;
void *hardirq_stack_ptr;
u16 softirq_pending;
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+ int rcu_preempt_count;
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
#ifdef CONFIG_X86_64
bool hardirq_stack_inuse;
#else
diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
new file mode 100644
index 000000000000..cb25ebe038a5
--- /dev/null
+++ b/arch/x86/include/asm/rcu_preempt.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_RCU_PREEMPT_H
+#define __ASM_RCU_PREEMPT_H
+
+#include <asm/rmwcc.h>
+#include <asm/percpu.h>
+#include <asm/current.h>
+
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
+/* We use the MSB mostly because its available */
+#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED 0x80000000
+
+/*
+ * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
+ * current->rcu_read_unlock_special.s such that a decrement hitting 0
+ * means we can and should call rcu_read_unlock_special().
+ */
+#define RCU_PREEMPT_INIT (0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
+
+/*
+ * We mask the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit so as not to
+ * confuse all current users that think a non-zero value indicates we
+ * are in a critical section.
+ */
+static inline int pcpu_rcu_preempt_count(void)
+{
+ return raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED;
+}
+
+static inline void pcpu_rcu_preempt_count_set(int count)
+{
+ int old, new;
+
+ old = raw_cpu_read_4(pcpu_hot.rcu_preempt_count);
+ do {
+ new = (old & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED) |
+ (count & ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+ } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.rcu_preempt_count, &old, new));
+}
+
+/*
+ * We fold the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit into the RCU
+ * preempt count such that rcu_read_unlock() can decrement and test for
+ * the need of unlock-special handling with a single instruction.
+ *
+ * We invert the actual bit, so that when the decrement hits 0 we know
+ * we both reach a quiescent state (no rcu preempt count) and need to
+ * handle unlock-special (the bit is cleared), normally to report the
+ * quiescent state immediately.
+ */
+
+static inline void pcpu_rcu_preempt_special_set(void)
+{
+ raw_cpu_and_4(pcpu_hot.rcu_preempt_count, ~RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline void pcpu_rcu_preempt_special_clear(void)
+{
+ raw_cpu_or_4(pcpu_hot.rcu_preempt_count, RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline bool pcpu_rcu_preempt_special_test(void)
+{
+ return !(raw_cpu_read_4(pcpu_hot.rcu_preempt_count) & RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+}
+
+static inline void pcpu_rcu_preempt_switch(int count, bool special)
+{
+ if (likely(!special))
+ raw_cpu_write(pcpu_hot.rcu_preempt_count, count | RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED);
+ else
+ raw_cpu_write(pcpu_hot.rcu_preempt_count, count);
+}
+
+/*
+ * The various rcu_preempt_count add/sub methods
+ */
+
+static __always_inline void pcpu_rcu_preempt_count_add(int val)
+{
+ raw_cpu_add_4(pcpu_hot.rcu_preempt_count, val);
+}
+
+static __always_inline void pcpu_rcu_preempt_count_sub(int val)
+{
+ raw_cpu_add_4(pcpu_hot.rcu_preempt_count, -val);
+}
+
+/*
+ * Because we keep RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED set when we do
+ * _not_ need to handle unlock-special for a fast-path decrement.
+ */
+static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
+{
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.rcu_preempt_count), e,
+ __percpu_arg([var]));
+}
+
+#define pcpu_rcu_read_unlock_special() \
+do { \
+ rcu_read_unlock_special(); \
+} while (0)
+
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
+#endif /* __ASM_RCU_PREEMPT_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ba8cf5e9ce56..0b204a649442 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1992,9 +1992,10 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid);
DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
- .current_task = &init_task,
- .preempt_count = INIT_PREEMPT_COUNT,
- .top_of_stack = TOP_OF_INIT_STACK,
+ .current_task = &init_task,
+ .preempt_count = INIT_PREEMPT_COUNT,
+ .top_of_stack = TOP_OF_INIT_STACK,
+ .rcu_preempt_count = RCU_PREEMPT_INIT,
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);
EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
--
2.19.1.6.gb485710b
Hi Lai,
kernel test robot noticed the following build errors:
[auto build test ERROR on paulmck-rcu/dev]
[also build test ERROR on tip/locking/core tip/sched/core tip/x86/asm tip/master linus/master v6.9-rc1 next-20240328]
[cannot apply to tip/x86/core tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Lai-Jiangshan/lib-Use-rcu_preempt_depth-to-replace-current-rcu_read_lock_nesting/20240328-155513
base: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
patch link: https://lore.kernel.org/r/20240328075318.83039-10-jiangshanlai%40gmail.com
patch subject: [PATCH 09/10] x86/rcu: Add rcu_preempt_count
config: x86_64-randconfig-161-20240328 (https://download.01.org/0day-ci/archive/20240329/[email protected]/config)
compiler: gcc-10 (Ubuntu 10.5.0-1ubuntu1) 10.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All error/warnings (new ones prefixed by >>):
>> arch/x86/kernel/cpu/common.c:1998:3: error: 'struct pcpu_hot' has no member named 'rcu_preempt_count'; did you mean 'preempt_count'?
1998 | .rcu_preempt_count = RCU_PREEMPT_INIT,
| ^~~~~~~~~~~~~~~~~
| preempt_count
>> arch/x86/kernel/cpu/common.c:1998:23: error: 'RCU_PREEMPT_INIT' undeclared here (not in a function); did you mean 'RCUREF_INIT'?
1998 | .rcu_preempt_count = RCU_PREEMPT_INIT,
| ^~~~~~~~~~~~~~~~
| RCUREF_INIT
>> arch/x86/kernel/cpu/common.c:1998:23: warning: excess elements in struct initializer
arch/x86/kernel/cpu/common.c:1998:23: note: (near initialization for 'pcpu_hot')
vim +1998 arch/x86/kernel/cpu/common.c
1993
1994 DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
1995 .current_task = &init_task,
1996 .preempt_count = INIT_PREEMPT_COUNT,
1997 .top_of_stack = TOP_OF_INIT_STACK,
> 1998 .rcu_preempt_count = RCU_PREEMPT_INIT,
1999 };
2000 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
2001 EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
2002
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Lai,
kernel test robot noticed the following build errors:
[auto build test ERROR on paulmck-rcu/dev]
[also build test ERROR on tip/locking/core tip/sched/core tip/x86/asm tip/master linus/master v6.9-rc1 next-20240328]
[cannot apply to tip/x86/core tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Lai-Jiangshan/lib-Use-rcu_preempt_depth-to-replace-current-rcu_read_lock_nesting/20240328-155513
base: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
patch link: https://lore.kernel.org/r/20240328075318.83039-10-jiangshanlai%40gmail.com
patch subject: [PATCH 09/10] x86/rcu: Add rcu_preempt_count
config: i386-randconfig-141-20240328 (https://download.01.org/0day-ci/archive/20240329/[email protected]/config)
compiler: clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
>> arch/x86/kernel/cpu/common.c:1998:23: error: use of undeclared identifier 'RCU_PREEMPT_INIT'
1998 | .rcu_preempt_count = RCU_PREEMPT_INIT,
| ^
1 error generated.
Kconfig warnings: (for reference only)
WARNING: unmet direct dependencies detected for DRM_I915_DEBUG_GEM
Depends on [n]: HAS_IOMEM [=y] && DRM_I915 [=y] && EXPERT [=y] && DRM_I915_WERROR [=n]
Selected by [y]:
- DRM_I915_DEBUG [=y] && HAS_IOMEM [=y] && DRM_I915 [=y] && EXPERT [=y] && !COMPILE_TEST [=n]
vim +/RCU_PREEMPT_INIT +1998 arch/x86/kernel/cpu/common.c
1993
1994 DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
1995 .current_task = &init_task,
1996 .preempt_count = INIT_PREEMPT_COUNT,
1997 .top_of_stack = TOP_OF_INIT_STACK,
> 1998 .rcu_preempt_count = RCU_PREEMPT_INIT,
1999 };
2000 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
2001 EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
2002
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Lai,
kernel test robot noticed the following build errors:
[auto build test ERROR on paulmck-rcu/dev]
[also build test ERROR on tip/locking/core tip/sched/core tip/x86/asm tip/master linus/master v6.9-rc1 next-20240328]
[cannot apply to tip/x86/core tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Lai-Jiangshan/lib-Use-rcu_preempt_depth-to-replace-current-rcu_read_lock_nesting/20240328-155513
base: https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
patch link: https://lore.kernel.org/r/20240328075318.83039-11-jiangshanlai%40gmail.com
patch subject: [PATCH 10/10] x86/rcu: Add THUNK rcu_read_unlock_special_thunk
config: i386-alldefconfig (https://download.01.org/0day-ci/archive/20240329/[email protected]/config)
compiler: gcc-12 (Ubuntu 12.3.0-9ubuntu2) 12.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240329/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
ld: init/main.o: in function `rest_init':
>> main.c:(.ref.text+0x96): undefined reference to `rcu_read_unlock_special_thunk'
>> ld: main.c:(.ref.text+0x9d): undefined reference to `rcu_read_unlock_special_thunk'
ld: arch/x86/events/intel/ds.o: in function `intel_pmu_drain_bts_buffer':
>> ds.c:(.text+0x2cd9): undefined reference to `rcu_read_unlock_special_thunk'
ld: arch/x86/kernel/nmi.o: in function `nmi_handle':
>> nmi.c:(.text+0x2e9): undefined reference to `rcu_read_unlock_special_thunk'
ld: arch/x86/kernel/alternative.o: in function `__text_poke':
>> alternative.c:(.text+0x531): undefined reference to `rcu_read_unlock_special_thunk'
ld: arch/x86/mm/init.o:init.c:(.init.text+0xf1): more undefined references to `rcu_read_unlock_special_thunk' follow
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
> On Mar 28, 2024, at 1:20 PM, Lai Jiangshan <[email protected]> wrote:
>
> From: Lai Jiangshan <[email protected]>
>
> Prepare for arch-specific-defined rcu_preempt_depth_set().
>
> No functionality change intended, but it has to be defined as a macro
> as rcupdate.h is a very low level header included from areas that don't
> even know about the task struct "current".
Sorry I did not follow changelog. If some rcupdate.h includers do not know
about task_struct, how does adding a macro that uses current help?
Thanks,
- Joel
>
> Cc: "Paul E. McKenney" <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> include/linux/rcupdate.h | 1 +
> kernel/rcu/tree_plugin.h | 5 -----
> 2 files changed, 1 insertion(+), 5 deletions(-)
>
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index 16f519914415..210f65baf47c 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -78,6 +78,7 @@ void __rcu_read_unlock(void);
> * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
> */
> #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
> +#define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
>
> #else /* #ifdef CONFIG_PREEMPT_RCU */
>
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index 36a8b5dbf5b5..b1264096d03a 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -387,11 +387,6 @@ static int rcu_preempt_read_exit(void)
> return ret;
> }
>
> -static void rcu_preempt_depth_set(int val)
> -{
> - WRITE_ONCE(current->rcu_read_lock_nesting, val);
> -}
> -
> /*
> * Preemptible RCU implementation for rcu_read_lock().
> * Just increment ->rcu_read_lock_nesting, shared state will be updated
> --
> 2.19.1.6.gb485710b
>
On Sun, Mar 31, 2024 at 7:10 PM Joel Fernandes <[email protected]> wrote:
>
>
>
> > On Mar 28, 2024, at 1:20 PM, Lai Jiangshan <[email protected]> wrote:
> >
> > From: Lai Jiangshan <[email protected]>
> >
> > Prepare for arch-specific-defined rcu_preempt_depth_set().
> >
> > No functionality change intended, but it has to be defined as a macro
> > as rcupdate.h is a very low level header included from areas that don't
> > even know about the task struct "current".
>
> Sorry I did not follow changelog. If some rcupdate.h includers do not know
> about task_struct, how does adding a macro that uses current help?
>
Hello
This is how macro works and it expands blindly based on tokens on the
usage-sites.
And rcu_preempt_depth() & rcu_preempt_depth_set() are not universally
used wrappers, the user can simply also include linux/sched.h to make
they work.
Thanks
Lai
> On Mar 31, 2024, at 9:46 PM, Lai Jiangshan <[email protected]> wrote:
>
> On Sun, Mar 31, 2024 at 7:10 PM Joel Fernandes <[email protected]> wrote:
>>
>>
>>
>>>> On Mar 28, 2024, at 1:20 PM, Lai Jiangshan <[email protected]> wrote:
>>>
>>> From: Lai Jiangshan <[email protected]>
>>>
>>> Prepare for arch-specific-defined rcu_preempt_depth_set().
>>>
>>> No functionality change intended, but it has to be defined as a macro
>>> as rcupdate.h is a very low level header included from areas that don't
>>> even know about the task struct "current".
>>
>> Sorry I did not follow changelog. If some rcupdate.h includers do not know
>> about task_struct, how does adding a macro that uses current help?
>>
>
> Hello
>
> This is how macro works and it expands blindly based on tokens on the
> usage-sites.
>
> And rcu_preempt_depth() & rcu_preempt_depth_set() are not universally
> used wrappers, the user can simply also include linux/sched.h to make
> they work.
Oh I see, so by hiding it in a macro, the code remains unexpanded. That makes sense..
Thanks.
>
> Thanks
> Lai
2024年4月1日 00:16,Lai Jiangshan <[email protected]> wrote:
>
> On Sun, Mar 31, 2024 at 7:10 PM Joel Fernandes <[email protected]> wrote:
>>
>>
>>
>>> On Mar 28, 2024, at 1:20 PM, Lai Jiangshan <[email protected]> wrote:
>>>
>>> From: Lai Jiangshan <[email protected]>
>>>
>>> Prepare for arch-specific-defined rcu_preempt_depth_set().
>>>
>>> No functionality change intended, but it has to be defined as a macro
>>> as rcupdate.h is a very low level header included from areas that don't
>>> even know about the task struct "current".
>>
>> Sorry I did not follow changelog. If some rcupdate.h includers do not know
>> about task_struct, how does adding a macro that uses current help?
>>
>
> Hello
>
> This is how macro works and it expands blindly based on tokens on the
> usage-sites.
But ‘current’ still needs to be expanded at last, it seems to me that it only affects
the including order of the header files?
Or what am I missing?
>
> And rcu_preempt_depth() & rcu_preempt_depth_set() are not universally
> used wrappers, the user can simply also include linux/sched.h to make
> they work.
>
> Thanks
> Lai
>
2024年4月1日 19:40,Alan Huang <[email protected]> wrote:
>
> 2024年4月1日 00:16,Lai Jiangshan <[email protected]> wrote:
>>
>> On Sun, Mar 31, 2024 at 7:10 PM Joel Fernandes <[email protected]> wrote:
>>>
>>>
>>>
>>>> On Mar 28, 2024, at 1:20 PM, Lai Jiangshan <[email protected]> wrote:
>>>>
>>>> From: Lai Jiangshan <[email protected]>
>>>>
>>>> Prepare for arch-specific-defined rcu_preempt_depth_set().
>>>>
>>>> No functionality change intended, but it has to be defined as a macro
>>>> as rcupdate.h is a very low level header included from areas that don't
>>>> even know about the task struct "current".
>>>
>>> Sorry I did not follow changelog. If some rcupdate.h includers do not know
>>> about task_struct, how does adding a macro that uses current help?
>>>
>>
>> Hello
>>
>> This is how macro works and it expands blindly based on tokens on the
>> usage-sites.
>
> But ‘current’ still needs to be expanded at last, it seems to me that it only affects
> the including order of the header files?
>
> Or what am I missing?
Get the missing part: if the user don’t need to use rcu_preempt_depth()
and rcu_preempt_depth_set() but other parts of rcupdate.h, then the two
has to be defined as a macro to avoid including linux/sched.h.
Sorry for the bother.
>
>>
>> And rcu_preempt_depth() & rcu_preempt_depth_set() are not universally
>> used wrappers, the user can simply also include linux/sched.h to make
>> they work.
>>
>> Thanks
>> Lai
Le Thu, Mar 28, 2024 at 03:53:16PM +0800, Lai Jiangshan a ?crit :
> From: Lai Jiangshan <[email protected]>
>
> When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
> corresponding functions, rcu_preempt core uses the functions to
> implement rcu_read_[un]lock, rcu_preempt_depth(), special bits,
> switching and so on.
The changelogs don't explain the reason for all of that. I'm guessing
from the cover letter that the purpose is to save some instructions on
calls to rcu_read_unlock() due to inline calls and per CPU optimized
instructions but it's hard to guess from the actual individual patches,
which are meants to be the actual git records.
Thanks.
Le Thu, Mar 28, 2024 at 03:53:17PM +0800, Lai Jiangshan a ?crit :
> From: Lai Jiangshan <[email protected]>
>
> Implement PCPU_RCU_PREEMPT_COUNT for x86.
> Mainly copied from asm/preempt.h
>
> Make rcu_read_[un]lock() inlined for rcu-preempt.
> Make rcu_read_lock() only one instruction.
> Make rcu_read_unlock() only two instructions in the fast path.
>
> Cc: "Paul E. McKenney" <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/current.h | 3 +
> arch/x86/include/asm/rcu_preempt.h | 107 +++++++++++++++++++++++++++++
> arch/x86/kernel/cpu/common.c | 7 +-
> 4 files changed, 115 insertions(+), 3 deletions(-)
> create mode 100644 arch/x86/include/asm/rcu_preempt.h
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 78050d5d7fac..7eb17c12f7b7 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -257,6 +257,7 @@ config X86
> select HAVE_OBJTOOL if X86_64
> select HAVE_OPTPROBES
> select HAVE_PAGE_SIZE_4KB
> + select HAVE_PCPU_RCU_PREEMPT_COUNT
> select HAVE_PCSPKR_PLATFORM
> select HAVE_PERF_EVENTS
> select HAVE_PERF_EVENTS_NMI
> diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
> index bf5953883ec3..dcc2ef784120 100644
> --- a/arch/x86/include/asm/current.h
> +++ b/arch/x86/include/asm/current.h
> @@ -24,6 +24,9 @@ struct pcpu_hot {
> unsigned long top_of_stack;
> void *hardirq_stack_ptr;
> u16 softirq_pending;
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> + int rcu_preempt_count;
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> #ifdef CONFIG_X86_64
> bool hardirq_stack_inuse;
> #else
> diff --git a/arch/x86/include/asm/rcu_preempt.h b/arch/x86/include/asm/rcu_preempt.h
> new file mode 100644
> index 000000000000..cb25ebe038a5
> --- /dev/null
> +++ b/arch/x86/include/asm/rcu_preempt.h
> @@ -0,0 +1,107 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_RCU_PREEMPT_H
> +#define __ASM_RCU_PREEMPT_H
> +
> +#include <asm/rmwcc.h>
> +#include <asm/percpu.h>
> +#include <asm/current.h>
> +
> +#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +/* We use the MSB mostly because its available */
I think you can safely remove the "We " from all the comments :-)
> +#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED 0x80000000
How about RCU_PREEMPT_UNLOCK_FASTPATH ?
> +
> +/*
> + * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
> + * current->rcu_read_unlock_special.s such that a decrement hitting 0
> + * means we can and should call rcu_read_unlock_special().
> + */
> +#define RCU_PREEMPT_INIT (0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
Or simply:
#define RCU_PREEMPT_INIT RCU_PREEMPT_UNLOCK_FASTPATH
Or you can even remove RCU_PREEMPT_INIT and use RCU_PREEMPT_UNLOCK_FASTPATH directly.
> +/*
> + * Because we keep RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED set when we do
> + * _not_ need to handle unlock-special for a fast-path decrement.
> + */
> +static __always_inline bool pcpu_rcu_preempt_count_dec_and_test(void)
> +{
> + return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.rcu_preempt_count), e,
> + __percpu_arg([var]));
> +}
> +
> +#define pcpu_rcu_read_unlock_special() \
> +do { \
> + rcu_read_unlock_special(); \
> +} while (0)
Why not just call that directly?
> +
> +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> +
> +#endif /* __ASM_RCU_PREEMPT_H */
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index ba8cf5e9ce56..0b204a649442 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1992,9 +1992,10 @@ static __init int setup_clearcpuid(char *arg)
> __setup("clearcpuid=", setup_clearcpuid);
>
> DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
> - .current_task = &init_task,
> - .preempt_count = INIT_PREEMPT_COUNT,
> - .top_of_stack = TOP_OF_INIT_STACK,
> + .current_task = &init_task,
> + .preempt_count = INIT_PREEMPT_COUNT,
> + .top_of_stack = TOP_OF_INIT_STACK,
> + .rcu_preempt_count = RCU_PREEMPT_INIT,
#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT ?
Thanks.
> };
> EXPORT_PER_CPU_SYMBOL(pcpu_hot);
> EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
> --
> 2.19.1.6.gb485710b
>
Hello, Frederic
Thanks for reviewing.
On Mon, Apr 22, 2024 at 7:05 PM Frederic Weisbecker <[email protected]> wrote:
> > +
> > +/* We use the MSB mostly because its available */
>
> I think you can safely remove the "We " from all the comments :-)
The file is mainly copied from arch/x86/include/asm/preempt.h.
I will rephrase sentences in later iterations.
>
> > +#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED 0x80000000
>
> How about RCU_PREEMPT_UNLOCK_FASTPATH ?
I'm not good at naming. But the MSB really means exactly the opposite
of current->rcu_read_unlock_special and I think "UNLOCK_SPECIAL_INVERTED"
fits the meaning.
>
> > +
> > +/*
> > + * We use the RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED bit as an inverted
> > + * current->rcu_read_unlock_special.s such that a decrement hitting 0
> > + * means we can and should call rcu_read_unlock_special().
> > + */
> > +#define RCU_PREEMPT_INIT (0 + RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED)
>
> Or simply:
>
> #define RCU_PREEMPT_INIT RCU_PREEMPT_UNLOCK_FASTPATH
>
> Or you can even remove RCU_PREEMPT_INIT and use RCU_PREEMPT_UNLOCK_FASTPATH directly.
"0" means the initial rcu_preempt_count is 0 for the initial task.
> > +
> > +#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
> > +
> > +#endif /* __ASM_RCU_PREEMPT_H */
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index ba8cf5e9ce56..0b204a649442 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -1992,9 +1992,10 @@ static __init int setup_clearcpuid(char *arg)
> > __setup("clearcpuid=", setup_clearcpuid);
> >
> > DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
> > - .current_task = &init_task,
> > - .preempt_count = INIT_PREEMPT_COUNT,
> > - .top_of_stack = TOP_OF_INIT_STACK,
> > + .current_task = &init_task,
> > + .preempt_count = INIT_PREEMPT_COUNT,
> > + .top_of_stack = TOP_OF_INIT_STACK,
> > + .rcu_preempt_count = RCU_PREEMPT_INIT,
>
> #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT ?
>
> Thanks.
Fixed in V2:
https://lore.kernel.org/lkml/[email protected]/
Thanks
Lai
>
>
> > };
> > EXPORT_PER_CPU_SYMBOL(pcpu_hot);
> > EXPORT_PER_CPU_SYMBOL(const_pcpu_hot);
> > --
> > 2.19.1.6.gb485710b
> >
Le Tue, Apr 23, 2024 at 05:02:35PM +0800, Lai Jiangshan a écrit :
> Hello, Frederic
>
> Thanks for reviewing.
>
> On Mon, Apr 22, 2024 at 7:05 PM Frederic Weisbecker <[email protected]> wrote:
>
> > > +
> > > +/* We use the MSB mostly because its available */
> >
> > I think you can safely remove the "We " from all the comments :-)
>
> The file is mainly copied from arch/x86/include/asm/preempt.h.
> I will rephrase sentences in later iterations.
>
> >
> > > +#define RCU_PREEMPT_UNLOCK_SPECIAL_INVERTED 0x80000000
> >
> > How about RCU_PREEMPT_UNLOCK_FASTPATH ?
>
>
> I'm not good at naming. But the MSB really means exactly the opposite
> of current->rcu_read_unlock_special and I think "UNLOCK_SPECIAL_INVERTED"
> fits the meaning.
Right but I tend to think a constant should tell what something is, not what
something is not.
FWIW, p->rcu_read_unlock_special could even be renamed to p->rcu_read_unlock_slowpath
Thanks.