2007-12-31 06:09:50

by K.Prasad

[permalink] [raw]
Subject: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.

Signed-off-by: K.Prasad <[email protected]>
---
include/linux/rcupreempt_trace.h | 46 ++++++++
kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
3 files changed, 251 insertions(+), 189 deletions(-)

Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
@@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
extern int rcupreempt_mb_flag(int cpu);
extern char *rcupreempt_try_flip_state_name(void);

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+ unsigned long rbs_stat_task_boost_called;
+ unsigned long rbs_stat_task_boosted;
+ unsigned long rbs_stat_boost_called;
+ unsigned long rbs_stat_try_boost;
+ unsigned long rbs_stat_boosted;
+ unsigned long rbs_stat_unboost_called;
+ unsigned long rbs_stat_unboosted;
+ unsigned long rbs_stat_try_boost_readers;
+ unsigned long rbs_stat_boost_readers;
+ unsigned long rbs_stat_try_unboost_readers;
+ unsigned long rbs_stat_unboost_readers;
+ unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+ void *private_data, const char *format, ...) \
+{ \
+ va_list ap; \
+ int cpu; \
+ struct preempt_rcu_boost_trace *boost_trace; \
+ va_start(ap, format); \
+ cpu = va_arg(ap, typeof(unsigned int)); \
+ boost_trace = (&per_cpu(boost_trace_data, cpu)); \
+ boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+ va_end(ap); \
+}
+
+struct preempt_rcu_boost_probe {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+ .name = __stringify(preempt_rcu_boost_probe_worker), \
+ .format = "%u", \
+ .probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;

static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ int cnt = 0;
+ int cpu;
+ struct preempt_rcu_boost_trace *prbt;
+ ssize_t bcount;
+ unsigned long task_boost_called = 0;
+ unsigned long task_boosted = 0;
+ unsigned long boost_called = 0;
+ unsigned long try_boost = 0;
+ unsigned long boosted = 0;
+ unsigned long unboost_called = 0;
+ unsigned long unboosted = 0;
+ unsigned long try_boost_readers = 0;
+ unsigned long boost_readers = 0;
+ unsigned long try_unboost_readers = 0;
+ unsigned long unboost_readers = 0;
+ unsigned long over_taken = 0;
+
+ mutex_lock(&mutex);
+
+ for_each_online_cpu(cpu) {
+ prbt = &per_cpu(boost_trace_data, cpu);
+
+ task_boost_called += prbt->rbs_stat_task_boost_called;
+ task_boosted += prbt->rbs_stat_task_boosted;
+ boost_called += prbt->rbs_stat_boost_called;
+ try_boost += prbt->rbs_stat_try_boost;
+ boosted += prbt->rbs_stat_boosted;
+ unboost_called += prbt->rbs_stat_unboost_called;
+ unboosted += prbt->rbs_stat_unboosted;
+ try_boost_readers += prbt->rbs_stat_try_boost_readers;
+ boost_readers += prbt->rbs_stat_boost_readers;
+ try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+ unboost_readers += prbt->rbs_stat_boost_readers;
+ over_taken += prbt->rbs_stat_over_taken;
+ }
+
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boost_called = %ld\n",
+ task_boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boosted = %ld\n",
+ task_boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_called = %ld\n",
+ boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost = %ld\n",
+ try_boost);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boosted = %ld\n",
+ boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_called = %ld\n",
+ unboost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboosted = %ld\n",
+ unboosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost_readers = %ld\n",
+ try_boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_readers = %ld\n",
+ boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_unboost_readers = %ld\n",
+ try_unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_readers = %ld\n",
+ unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "over_taken = %ld\n",
+ over_taken);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "rcu_boost_prio = %d\n",
+ read_rcu_boost_prio());
+ bcount = simple_read_from_buffer(buffer, count, ppos,
+ rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+ mutex_unlock(&mutex);
+
+ return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+ .read = rcuboost_read,
+};
+
+static struct dentry *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+ rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+ NULL, &rcuboost_fops);
+ if (!rcuboostdir)
+ return 0;
+
+ return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+ if (rcuboostdir)
+ debugfs_remove(rcuboostdir);
+ rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
{
return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
if (!ctrsdir)
goto free_out;

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ if (!rcu_trace_boost_create(rcudir))
+ goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
return 0;
free_out:
if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
}
printk(KERN_INFO "RCU Preempt markers registered\n");

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+ struct preempt_rcu_boost_probe *p = \
+ &preempt_rcu_boost_probe_array[i];
+ ret = marker_probe_register(p->name, p->format,
+ p->probe_func, p);
+ if (ret)
+ printk(KERN_INFO "Unable to register Preempt RCU Boost \
+ probe %s\n", preempt_rcu_boost_probe_array[i].name);
+ ret = marker_arm(p->name);
+ if (ret)
+ printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+ markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
marker_probe_unregister(rcupreempt_probe_array[i].name);
printk(KERN_INFO "RCU Preempt markers unregistered\n");

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ rcu_trace_boost_destroy();
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+ marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+ printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
int rbs_prio; /* CPU copy of rcu_boost_prio */
struct list_head rbs_toboost; /* Preempted RCU readers */
struct list_head rbs_boosted; /* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
- /* The rest are for statistics */
- unsigned long rbs_stat_task_boost_called;
- unsigned long rbs_stat_task_boosted;
- unsigned long rbs_stat_boost_called;
- unsigned long rbs_stat_try_boost;
- unsigned long rbs_stat_boosted;
- unsigned long rbs_stat_unboost_called;
- unsigned long rbs_stat_unboosted;
- unsigned long rbs_stat_try_boost_readers;
- unsigned long rbs_stat_boost_readers;
- unsigned long rbs_stat_try_unboost_readers;
- unsigned long rbs_stat_unboost_readers;
- unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
};

static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- static DEFINE_MUTEX(mutex);
- int cnt = 0;
- int cpu;
- struct rcu_boost_dat *rbd;
- ssize_t bcount;
- unsigned long task_boost_called = 0;
- unsigned long task_boosted = 0;
- unsigned long boost_called = 0;
- unsigned long try_boost = 0;
- unsigned long boosted = 0;
- unsigned long unboost_called = 0;
- unsigned long unboosted = 0;
- unsigned long try_boost_readers = 0;
- unsigned long boost_readers = 0;
- unsigned long try_unboost_readers = 0;
- unsigned long unboost_readers = 0;
- unsigned long over_taken = 0;
-
- mutex_lock(&mutex);
-
- for_each_online_cpu(cpu) {
- rbd = &per_cpu(rcu_boost_data, cpu);
-
- task_boost_called += rbd->rbs_stat_task_boost_called;
- task_boosted += rbd->rbs_stat_task_boosted;
- boost_called += rbd->rbs_stat_boost_called;
- try_boost += rbd->rbs_stat_try_boost;
- boosted += rbd->rbs_stat_boosted;
- unboost_called += rbd->rbs_stat_unboost_called;
- unboosted += rbd->rbs_stat_unboosted;
- try_boost_readers += rbd->rbs_stat_try_boost_readers;
- boost_readers += rbd->rbs_stat_boost_readers;
- try_unboost_readers += rbd->rbs_stat_try_boost_readers;
- unboost_readers += rbd->rbs_stat_boost_readers;
- over_taken += rbd->rbs_stat_over_taken;
- }
-
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boost_called = %ld\n",
- task_boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boosted = %ld\n",
- task_boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_called = %ld\n",
- boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost = %ld\n",
- try_boost);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boosted = %ld\n",
- boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_called = %ld\n",
- unboost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboosted = %ld\n",
- unboosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost_readers = %ld\n",
- try_boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_readers = %ld\n",
- boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_unboost_readers = %ld\n",
- try_unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_readers = %ld\n",
- unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "over_taken = %ld\n",
- over_taken);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "rcu_boost_prio = %d\n",
- rcu_boost_prio);
- bcount = simple_read_from_buffer(buffer, count, ppos,
- rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
- mutex_unlock(&mutex);
-
- return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
- .read = rcuboost_read,
-};
-
-static struct dentry *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
- rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
- NULL, &rcuboost_fops);
- if (!rcuboostdir)
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
- if (rcuboostdir)
- debugfs_remove(rcuboostdir);
- rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type) \
- static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd) \
- { \
- rbd->rbs_stat_##type++; \
- }
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */

static inline int rcu_is_boosted(struct task_struct *task)
{
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
WARN_ON(!irqs_disabled());
WARN_ON_SMP(!spin_is_locked(&task->pi_lock));

- rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+ trace_mark(task_boost_called, "%u", smp_processor_id());

if (task->rcu_prio < task->prio) {
- rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+ trace_mark(task_boosted, "%u", smp_processor_id());
task_setprio(task, task->rcu_prio);
}
}
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)

WARN_ON(!current->rcu_read_lock_nesting);

- rcu_trace_boost_boost_called(RCU_BOOST_ME);
+ trace_mark(boost_called, "%u", smp_processor_id());

/* check to see if we are already boosted */
if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)

curr->rcub_rbdp = rbd;

- rcu_trace_boost_try_boost(rbd);
+ trace_mark(try_boost, "%u", smp_processor_id());

prio = rt_mutex_getprio(curr);

@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
if (prio <= rbd->rbs_prio)
goto out;

- rcu_trace_boost_boosted(curr->rcub_rbdp);
+ trace_mark(boosted, "%u", smp_processor_id());

curr->rcu_prio = rbd->rbs_prio;
rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
int prio;
unsigned long flags;

- rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+ trace_mark(unboost_called, "%u", smp_processor_id());

/* if not boosted, then ignore */
if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)

list_del_init(&curr->rcub_entry);

- rcu_trace_boost_unboosted(rbd);
+ trace_mark(unboosted, "%u", smp_processor_id());

curr->rcu_prio = MAX_PRIO;

@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
* Another task may have taken over.
*/
if (curr->rcu_preempt_counter != rcu_boost_counter) {
- rcu_trace_boost_over_taken(rbd);
+ trace_mark(over_taken, "%u", smp_processor_id());
return 1;
}

@@ -443,7 +266,7 @@ void rcu_boost_readers(void)

prio = rt_mutex_getprio(curr);

- rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+ trace_mark(try_boost_readers, "%u", smp_processor_id());

if (prio >= rcu_boost_prio) {
/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)

rcu_boost_prio = prio;

- rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+ trace_mark(boost_readers, "%u", smp_processor_id());

/* Flag that we are the one to unboost */
curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)

spin_lock_irqsave(&rcu_boost_wake_lock, flags);

- rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+ trace_mark(try_unboost_readers, "%u", smp_processor_id());

if (current->rcu_preempt_counter != rcu_boost_counter)
goto out;

- rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+ trace_mark(unboost_readers, "%u", smp_processor_id());

/*
* We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
}

/*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+ return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
* The krcupreemptd wakes up every "rcu_preempt_thread_secs"
* seconds at the minimum priority of 1 to do a
* synchronize_rcu. This ensures that grace periods finish


2007-12-31 10:21:40

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing


* K. Prasad <[email protected]> wrote:

> @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
>
> spin_lock_irqsave(&rcu_boost_wake_lock, flags);
>
> - rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> + trace_mark(try_unboost_readers, "%u", smp_processor_id());

this isnt directed at you or your patch, it's more directed at Mathieu,
but looking at this actual markers patch submitted to me, i'm still
fundamentally worried about the whole marker approach.

Firstly, why on earth does a full format string have to be passed in for
something as simple as a CPU id? This way we basically codify it forever
that tracing _has_ to be expensive when enabled. The latency-tracer
(which i'd love to convert to markers, if only markers were capable
enough) has shown it that tracing _can_ be used to capture performance
data without disturbing the measured system, even at hundreds of
thousands context switches a second per CPU.

Secondly, the inlined overhead of trace_mark() is still WAY too large:

if (unlikely(__mark_##name.state)) { \
preempt_disable(); \
(*__mark_##name.call) \
(&__mark_##name, call_data, \
format, ## args); \
preempt_enable(); \
} \

Whatever became of the obvious suggestion that i outlined years ago, to
have a _single_ trace call instruction and to _patch out_ the damn
marker calls by default? No .state flag checking inlined. No
preempt_disable()/enable() pair. Patching static tracepoints OUT of the
kernel, only leaving a ~5-byte NOP sequence behind them (and some
minimal disturbance to the variables the tracepoint accesses). We've got
all the alternatives.h code patching infrastructure available for such
purposes. Why are we 2 years down the line and _STILL_ arguing about
this?

Thirdly, the patch selects CONFIG_MARKERS:

> config RCU_TRACE
> - bool "Enable tracing for RCU - currently stats in debugfs"
> + tristate "Enable tracing for RCU - currently stats in debugfs"
> select DEBUG_FS
> - default y
> + select MARKERS

Which adds overhead (inlined checks for markers) all around the kernel,
even if all markers are deactivated! Imagine a thousand of them and the
kernel blows up measurably.

Sadly, this whole trace_mark() API seems to have gotten much worse since
i last saw it. It's sub-par when it's turned on and it's sub-par when
it's turned off. It gets us the worst of both worlds.

If it continues like this then i'd much rather see people add printks as
tracing, because there you _know_ that it's high-overhead and people
wont start arguing about trace compatibility either, etc.

Ingo

2008-01-02 04:29:18

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

Ingo Molnar <[email protected]> writes:

> [...] Firstly, why on earth does a full format string have to be
> passed in for something as simple as a CPU id? This way we basically
> codify it forever that tracing _has_ to be expensive when
> enabled. [...]

FWIW, I'm not keen about the format strings either, but they don't
constitute a performance hit beyond an additional parameter. It does
not need to actually get parsed at run time.


>[...]
> Secondly, the inlined overhead of trace_mark() is still WAY too large:
>
> if (unlikely(__mark_##name.state)) { \
> [...]
> } \

Note that this is for the unoptimized case. The immediate-value code
is better. I have still yet to see some good measurements of how much
the overheads of the various variants are, however. It's only fair to
gather these numbers and continue the debate with them in hand.


> Whatever became of the obvious suggestion that i outlined years ago,
> to have a _single_ trace call instruction and to _patch out_ the
> damn marker calls by default? [...] only leaving a ~5-byte NOP
> sequence behind them (and some minimal disturbance to the variables
> the tracepoint accesses). [...]

This has been answered several times before. It's because the marker
parameters have to be (conditionally) evaluated and pushed onto a call
frame. It's not just a call that would need being nop'd, but a whole
function call setup/teardown sequence, which itself can be interleaved
with adjacent code.


- FChE

2008-01-02 12:49:08

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing


* Frank Ch. Eigler <[email protected]> wrote:

> Ingo Molnar <[email protected]> writes:
>
> > [...] Firstly, why on earth does a full format string have to be
> > passed in for something as simple as a CPU id? This way we basically
> > codify it forever that tracing _has_ to be expensive when
> > enabled. [...]
>
> FWIW, I'm not keen about the format strings either, but they don't
> constitute a performance hit beyond an additional parameter. It does
> not need to actually get parsed at run time.

"only" an additional parameter. The whole _point_ behind these markers
is for them to have minimal effect!

> >[...]
> > Secondly, the inlined overhead of trace_mark() is still WAY too large:
> >
> > if (unlikely(__mark_##name.state)) { \
> > [...]
> > } \
>
> Note that this is for the unoptimized case. The immediate-value code
> is better. I have still yet to see some good measurements of how much
> the overheads of the various variants are, however. It's only fair to
> gather these numbers and continue the debate with them in hand.

this is a general policy matter. It is _so much easier_ to add markers
if they _can_ have near-zero overhead (as in 1-2 instructions).
Otherwise we'll keep arguing about it, especially if any is added to
performance-critical codepath. (where we are counting instructions)

> > Whatever became of the obvious suggestion that i outlined years ago,
> > to have a _single_ trace call instruction and to _patch out_ the
> > damn marker calls by default? [...] only leaving a ~5-byte NOP
> > sequence behind them (and some minimal disturbance to the variables
> > the tracepoint accesses). [...]
>
> This has been answered several times before. It's because the marker
> parameters have to be (conditionally) evaluated and pushed onto a call
> frame. It's not just a call that would need being nop'd, but a whole
> function call setup/teardown sequence, which itself can be interleaved
> with adjacent code.

you are still missing my point. Firstly, the kernel is regparm built so
there's no call frame to be pushed to in most cases - we pass most
parameters in registers. (hence my stressing of minimizing the number of
parameters to an absolute minimum)

Secondly, the side-effects of a function call is what i referred to via:

> [...] (and some minimal disturbance to the variables the tracepoint
> accesses). [...]

really, a tracepoint obviously accesses data that is readily accessible
in that spot. Worst-case we'll have some additional register constraints
that make the code a bit less optimal.

Yes, if a trace point references data that is _not_ readily available,
then of course the preparation for the function call might not be cheap.
But that can be optimized by placing the tracepoints intelligently.

what cannot be optimized away at all are the conditional instructions
introduced by the probe points, extra parameters and the space overhead
of the function call itself.

Ingo

2008-01-02 16:35:39

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

Hi -

On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> [...]
> > FWIW, I'm not keen about the format strings either, but they don't
> > constitute a performance hit beyond an additional parameter. It does
> > not need to actually get parsed at run time.
>
> "only" an additional parameter. The whole _point_ behind these markers
> is for them to have minimal effect!

Agreed. The only alternative I recall seeing proposed was my own
cartesian-product macro suite that encodes parameter types into the
marker function/macro name itself. (Maybe some of that could be
hidden with gcc typeof() magic.) There appeared to be a consensus
that this was more undesirable. Do you agree?


> [...]
> this is a general policy matter. It is _so much easier_ to add markers
> if they _can_ have near-zero overhead (as in 1-2 instructions).
> Otherwise we'll keep arguing about it, especially if any is added to
> performance-critical codepath. (where we are counting instructions)

The effect of the immediate-values patch, combined with gcc
CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
dcache-impact-free instructions. The register saves, parameter
evaluation, the function call, can all be moved out of line.


> > > Whatever became of the obvious suggestion that i outlined years ago,
> > > to have a _single_ trace call instruction and to _patch out_ the
> > > damn marker calls by default? [...] only leaving a ~5-byte NOP
> > > sequence behind them (and some minimal disturbance to the variables
> > > the tracepoint accesses). [...]
> >
> > This has been answered several times before. It's because the marker
> > parameters have to be (conditionally) evaluated and pushed onto a call
> > frame. It's not just a call that would need being nop'd, but a whole
> > function call setup/teardown sequence, which itself can be interleaved
> > with adjacent code.
>
> you are still missing my point. Firstly, the kernel is regparm built so
> there's no call frame to be pushed to in most cases - we pass most
> parameters in registers. [...]

Yes, but those registers will already be used, so their former values
need to be saved or later recomputed as a part of the call sequence.
Even wrapping it all with an asm barrier may not be enough to
completely bound the code with an address range that can be naively
NOP'd out.


> Secondly, the side-effects of a function call is what i referred to via:
>
> > [...] (and some minimal disturbance to the variables the tracepoint
> > accesses). [...]
>
> really, a tracepoint obviously accesses data that is readily accessible
> in that spot. Worst-case we'll have some additional register constraints
> that make the code a bit less optimal. [...]

It's more than that - people will want to pass some dereferenced
fields from a struct*; some old function parameters that were by the
time of the marker call shoved out of registers again. Register
constraints that prohibit general expressions would impede the
utility of the tool.


> what cannot be optimized away at all are the conditional
> instructions introduced by the probe points, extra parameters and
> the space overhead of the function call itself.

Almost all of that can & should be moved out of line.


- FChE

2008-01-02 17:02:41

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing


* Frank Ch. Eigler <[email protected]> wrote:

> > [...] this is a general policy matter. It is _so much easier_ to add
> > markers if they _can_ have near-zero overhead (as in 1-2
> > instructions). Otherwise we'll keep arguing about it, especially if
> > any is added to performance-critical codepath. (where we are
> > counting instructions)
>
> The effect of the immediate-values patch, combined with gcc
> CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> dcache-impact-free instructions. The register saves, parameter
> evaluation, the function call, can all be moved out of line.

well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
we should already be getting that, right?

There's one thing that would make out-of-line tracepoints have a lot
less objectionable to me: right now the 'out of line' area is put to the
end of functions. That splinters the kernel image with inactive, rarely
taken areas of code - blowing up its icache footprint considerably. For
example sched.o has ~100 functions, with the average function size being
200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
already.

It's true that keeping the off-site code within the function keeps total
codesize slightly smaller, because the offsets (and hence the
conditional jumps) are thus 8 bit - but that's below 1% and the
cache-blow-up aspect is more severe in practice at 10-20%.

So it would be nice if we could collect all this offline code and stuff
it away into another portion of the kernel image. (or, into another
portion of the object file - which would still be good enough in
practice)

Ingo

2008-01-02 17:58:41

by Frank Ch. Eigler

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

Hi -

On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> [...]
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> we should already be getting that, right?

Right.

> [...] So it would be nice if we could collect all this offline code
> and stuff it away into another portion of the kernel image. (or,
> into another portion of the object file - which would still be good
> enough in practice)

That would be the -freorder-blocks-and-partition flag, as proposed by
Arjan two Februarys ago. I don't see any traces of Andi's overriding
"-fno-reorder-blocks" in the current linus tree, so maybe it's time to
resurrect this one:

http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html

- FChE

2008-01-02 20:12:00

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing


* Frank Ch. Eigler <[email protected]> wrote:

> Hi -
>
> On Wed, Jan 02, 2008 at 06:01:57PM +0100, Ingo Molnar wrote:
> > [...]
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > we should already be getting that, right?
>
> Right.
>
> > [...] So it would be nice if we could collect all this offline code
> > and stuff it away into another portion of the kernel image. (or,
> > into another portion of the object file - which would still be good
> > enough in practice)
>
> That would be the -freorder-blocks-and-partition flag, as proposed by
> Arjan two Februarys ago. I don't see any traces of Andi's overriding
> "-fno-reorder-blocks" in the current linus tree, so maybe it's time to
> resurrect this one:
>
> http://readlist.com/lists/vger.kernel.org/linux-kernel/39/196123.html

hm, that gives:

Forbidden

You don't have permission to access
/lists/vger.kernel.org/linux-kernel/39/196123.html on this server.

but yeah, i had the impression that gcc couldnt yet do this. Not a
showstopper, but it would be nice to initiate the gcc side of things ...

Ingo

2008-01-02 23:49:19

by Nicholas Miell

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing


On Wed, 2008-01-02 at 11:33 -0500, Frank Ch. Eigler wrote:
> Hi -
>
> On Wed, Jan 02, 2008 at 01:47:34PM +0100, Ingo Molnar wrote:
> > [...]
> > > FWIW, I'm not keen about the format strings either, but they don't
> > > constitute a performance hit beyond an additional parameter. It does
> > > not need to actually get parsed at run time.
> >
> > "only" an additional parameter. The whole _point_ behind these markers
> > is for them to have minimal effect!
>
> Agreed. The only alternative I recall seeing proposed was my own
> cartesian-product macro suite that encodes parameter types into the
> marker function/macro name itself. (Maybe some of that could be
> hidden with gcc typeof() magic.) There appeared to be a consensus
> that this was more undesirable. Do you agree?
>
>

C++ name mangling would be extremely useful here.


Actually, why isn't the DWARF information for the functions sufficient?

--
Nicholas Miell <[email protected]>

2008-01-03 16:36:18

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* K. Prasad ([email protected]) wrote:
> This patch converts the tracing mechanism of Preempt RCU boosting into
> markers. The handler functions for these markers are included inside
> rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> chosen.
>
> Signed-off-by: K.Prasad <[email protected]>
> ---
> include/linux/rcupreempt_trace.h | 46 ++++++++
> kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> 3 files changed, 251 insertions(+), 189 deletions(-)
>
> Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> ===================================================================
> --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> extern int rcupreempt_mb_flag(int cpu);
> extern char *rcupreempt_try_flip_state_name(void);
>
> +#ifdef CONFIG_PREEMPT_RCU_BOOST
> +struct preempt_rcu_boost_trace {
> + unsigned long rbs_stat_task_boost_called;
> + unsigned long rbs_stat_task_boosted;
> + unsigned long rbs_stat_boost_called;
> + unsigned long rbs_stat_try_boost;
> + unsigned long rbs_stat_boosted;
> + unsigned long rbs_stat_unboost_called;
> + unsigned long rbs_stat_unboosted;
> + unsigned long rbs_stat_try_boost_readers;
> + unsigned long rbs_stat_boost_readers;
> + unsigned long rbs_stat_try_unboost_readers;
> + unsigned long rbs_stat_unboost_readers;
> + unsigned long rbs_stat_over_taken;
> +};
> +
> +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> + void *private_data, const char *format, ...) \
> +{ \
> + va_list ap; \
> + int cpu; \
> + struct preempt_rcu_boost_trace *boost_trace; \
> + va_start(ap, format); \
> + cpu = va_arg(ap, typeof(unsigned int)); \

(sorry for late response, I just came back from vacation)

Instead of passing the cpu id as a marker parameter, why don't you
simply use smp_processor_id() right here ?

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-01-03 19:24:37

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* Ingo Molnar ([email protected]) wrote:
>
> * K. Prasad <[email protected]> wrote:
>
> > @@ -486,12 +309,12 @@ void rcu_unboost_readers(void)
> >
> > spin_lock_irqsave(&rcu_boost_wake_lock, flags);
> >
> > - rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
> > + trace_mark(try_unboost_readers, "%u", smp_processor_id());
>
> this isnt directed at you or your patch, it's more directed at Mathieu,
> but looking at this actual markers patch submitted to me, i'm still
> fundamentally worried about the whole marker approach.
>

Hi Ingo,

Just to note that some of your concerns have been answered by Frank and
that I fully agree with what he said. Sorry for the late reply
(vacation..)

> Firstly, why on earth does a full format string have to be passed in for
> something as simple as a CPU id? This way we basically codify it forever
> that tracing _has_ to be expensive when enabled. The latency-tracer
> (which i'd love to convert to markers, if only markers were capable
> enough) has shown it that tracing _can_ be used to capture performance
> data without disturbing the measured system, even at hundreds of
> thousands context switches a second per CPU.
>

As I replyed in my other email, the cpu id does not have to be passed as
an argument. Also, when no argument is passed, the format string does
not have to be parsed at all in the callback.

> Secondly, the inlined overhead of trace_mark() is still WAY too large:
>
> if (unlikely(__mark_##name.state)) { \
> preempt_disable(); \
> (*__mark_##name.call) \
> (&__mark_##name, call_data, \
> format, ## args); \
> preempt_enable(); \
> } \
>

To get the full version of my trace_mark, you would have to take a few
bits from the -mm tree, which includes the "multiple probes support"
(this one moves the preempt disable/enable completely out of line) and
you should also get the "markers use immediate values" patch which I
submitted a few weeks ago : it uses code patching to modify a byte in a
mov instruction that is used to jump over the entire function call
(memory references, stack setup and the call itself)

> Whatever became of the obvious suggestion that i outlined years ago, to
> have a _single_ trace call instruction and to _patch out_ the damn
> marker calls by default? No .state flag checking inlined. No
> preempt_disable()/enable() pair. Patching static tracepoints OUT of the
> kernel, only leaving a ~5-byte NOP sequence behind them (and some
> minimal disturbance to the variables the tracepoint accesses). We've got
> all the alternatives.h code patching infrastructure available for such
> purposes. Why are we 2 years down the line and _STILL_ arguing about
> this?
>

Frank replied appropriately to this. It's mostly because of stack setup
and/or move to registers to prepare the call.

> Thirdly, the patch selects CONFIG_MARKERS:
>
> > config RCU_TRACE
> > - bool "Enable tracing for RCU - currently stats in debugfs"
> > + tristate "Enable tracing for RCU - currently stats in debugfs"
> > select DEBUG_FS
> > - default y
> > + select MARKERS
>
> Which adds overhead (inlined checks for markers) all around the kernel,
> even if all markers are deactivated! Imagine a thousand of them and the
> kernel blows up measurably.
>
> Sadly, this whole trace_mark() API seems to have gotten much worse since
> i last saw it. It's sub-par when it's turned on and it's sub-par when
> it's turned off. It gets us the worst of both worlds.
>
> If it continues like this then i'd much rather see people add printks as
> tracing, because there you _know_ that it's high-overhead and people
> wont start arguing about trace compatibility either, etc.
>

I think Frank's response explains things in enough depth.

> Ingo

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> * K. Prasad ([email protected]) wrote:
> > This patch converts the tracing mechanism of Preempt RCU boosting into
> > markers. The handler functions for these markers are included inside
> > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > chosen.
> >
> > Signed-off-by: K.Prasad <[email protected]>
> > ---
> > include/linux/rcupreempt_trace.h | 46 ++++++++
> > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > 3 files changed, 251 insertions(+), 189 deletions(-)
> >
> > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > ===================================================================
> > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > extern int rcupreempt_mb_flag(int cpu);
> > extern char *rcupreempt_try_flip_state_name(void);
> >
> > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > +struct preempt_rcu_boost_trace {
> > + unsigned long rbs_stat_task_boost_called;
> > + unsigned long rbs_stat_task_boosted;
> > + unsigned long rbs_stat_boost_called;
> > + unsigned long rbs_stat_try_boost;
> > + unsigned long rbs_stat_boosted;
> > + unsigned long rbs_stat_unboost_called;
> > + unsigned long rbs_stat_unboosted;
> > + unsigned long rbs_stat_try_boost_readers;
> > + unsigned long rbs_stat_boost_readers;
> > + unsigned long rbs_stat_try_unboost_readers;
> > + unsigned long rbs_stat_unboost_readers;
> > + unsigned long rbs_stat_over_taken;
> > +};
> > +
> > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > + void *private_data, const char *format, ...) \
> > +{ \
> > + va_list ap; \
> > + int cpu; \
> > + struct preempt_rcu_boost_trace *boost_trace; \
> > + va_start(ap, format); \
> > + cpu = va_arg(ap, typeof(unsigned int)); \
>
> (sorry for late response, I just came back from vacation)
>
> Instead of passing the cpu id as a marker parameter, why don't you
> simply use smp_processor_id() right here ?
>

Agreed.

Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
we need not pass the parameter and simply use smp_processor_id().

It's only the RCU_TRACE_RDP() that requires cpuid to be passed.


Thanks and Regards
gautham.


> --
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

--
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"

Subject: __get_cpu_var() called from a preempt-unsafe context in __rcu_preempt_unboost() ?

Hi Paul, Steve,

This is with reference to the preemptible rcu implementation in
2.6.24-rc5-rt1.

In __rcu_read_unlock(), we call __rcu_preempt_unboost() to unboost a
task's priority which had been bumped up when it was preempted out
while the rcu_read section.

The code path is as follows

__rcu_read_unlock()
|
|--> local_irq_restore(oldirq);
|
|--> __rcu_preempt_unboost();
|
|--> rcu_trace_boost_unboost_called(RCU_BOOST_ME)


where RCU_BOOST_ME is #defined as &__get_cpu_var(rcu_boost_data).

Is calling __get_cpu_var() safe in this context, since we've already
enabled the local interrupts and we're not in a preempt_disabled() ?

Thanks and Regards
gautham.

--
Gautham R Shenoy
Linux Technology Center
IBM India.
"Freedom comes with a price tag of responsibility, which is still a bargain,
because Freedom is priceless!"

2008-01-04 13:48:57

by Steven Rostedt

[permalink] [raw]
Subject: Re: __get_cpu_var() called from a preempt-unsafe context in __rcu_preempt_unboost() ?


On Fri, 4 Jan 2008, Gautham R Shenoy wrote:
> |
> |--> rcu_trace_boost_unboost_called(RCU_BOOST_ME)
>
>
> where RCU_BOOST_ME is #defined as &__get_cpu_var(rcu_boost_data).
>
> Is calling __get_cpu_var() safe in this context, since we've already
> enabled the local interrupts and we're not in a preempt_disabled() ?

Probably not, but the worst that can happen is that we corrupt the trace
counter, and miss an increment. I'll see if I can fix that, but it should
be quite harmless wrt the stability of the system.

Thanks for pointing that out.

-- Steve

2008-01-05 12:46:47

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* Gautham R Shenoy ([email protected]) wrote:
> On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > * K. Prasad ([email protected]) wrote:
> > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > markers. The handler functions for these markers are included inside
> > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > chosen.
> > >
> > > Signed-off-by: K.Prasad <[email protected]>
> > > ---
> > > include/linux/rcupreempt_trace.h | 46 ++++++++
> > > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > > 3 files changed, 251 insertions(+), 189 deletions(-)
> > >
> > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > ===================================================================
> > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > > extern int rcupreempt_mb_flag(int cpu);
> > > extern char *rcupreempt_try_flip_state_name(void);
> > >
> > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > +struct preempt_rcu_boost_trace {
> > > + unsigned long rbs_stat_task_boost_called;
> > > + unsigned long rbs_stat_task_boosted;
> > > + unsigned long rbs_stat_boost_called;
> > > + unsigned long rbs_stat_try_boost;
> > > + unsigned long rbs_stat_boosted;
> > > + unsigned long rbs_stat_unboost_called;
> > > + unsigned long rbs_stat_unboosted;
> > > + unsigned long rbs_stat_try_boost_readers;
> > > + unsigned long rbs_stat_boost_readers;
> > > + unsigned long rbs_stat_try_unboost_readers;
> > > + unsigned long rbs_stat_unboost_readers;
> > > + unsigned long rbs_stat_over_taken;
> > > +};
> > > +
> > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > + void *private_data, const char *format, ...) \
> > > +{ \
> > > + va_list ap; \
> > > + int cpu; \
> > > + struct preempt_rcu_boost_trace *boost_trace; \
> > > + va_start(ap, format); \
> > > + cpu = va_arg(ap, typeof(unsigned int)); \
> >
> > (sorry for late response, I just came back from vacation)
> >
> > Instead of passing the cpu id as a marker parameter, why don't you
> > simply use smp_processor_id() right here ?
> >
>
> Agreed.
>
> Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
> we need not pass the parameter and simply use smp_processor_id().
>
> It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
>


And it would help if you declare the format string with something like :

"cpuid %u" instead of "%u"

This way, if a generic tracer like LTTng dumps the traces and later a
plugin in an analysis tool like LTTV have to hook on a specific field of
your event, it can access it by specifying the marker name and field
name. And even if fields are added to this event, it won't break
compatibility as long as this field is there and have the same name.

Mathieu

>
> Thanks and Regards
> gautham.
>
>
> > --
> > Mathieu Desnoyers
> > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
>
> --
> Gautham R Shenoy
> Linux Technology Center
> IBM India.
> "Freedom comes with a price tag of responsibility, which is still a bargain,
> because Freedom is priceless!"

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-01-07 19:00:13

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* Ingo Molnar ([email protected]) wrote:
>
> * Frank Ch. Eigler <[email protected]> wrote:
>
> > > [...] this is a general policy matter. It is _so much easier_ to add
> > > markers if they _can_ have near-zero overhead (as in 1-2
> > > instructions). Otherwise we'll keep arguing about it, especially if
> > > any is added to performance-critical codepath. (where we are
> > > counting instructions)
> >
> > The effect of the immediate-values patch, combined with gcc
> > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > dcache-impact-free instructions. The register saves, parameter
> > evaluation, the function call, can all be moved out of line.
>
> well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> we should already be getting that, right?
>
> There's one thing that would make out-of-line tracepoints have a lot
> less objectionable to me: right now the 'out of line' area is put to the
> end of functions. That splinters the kernel image with inactive, rarely
> taken areas of code - blowing up its icache footprint considerably. For
> example sched.o has ~100 functions, with the average function size being
> 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> already.

Hrm, I agree this can be a problem on architectures with more standard
associative icaches, but aren't most x86_64 machines (and modern x86_32)
using an instruction trace cache instead ? This makes the problem
irrelevant.

But I agree that, as Frank proposed, -freorder-blocks-and-partition
could help us in that matter for the architectures using an associative
L1 icache.

Mathieu

>
> It's true that keeping the off-site code within the function keeps total
> codesize slightly smaller, because the offsets (and hence the
> conditional jumps) are thus 8 bit - but that's below 1% and the
> cache-blow-up aspect is more severe in practice at 10-20%.
>
> So it would be nice if we could collect all this offline code and stuff
> it away into another portion of the kernel image. (or, into another
> portion of the object file - which would still be good enough in
> practice)
>
> Ingo

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-01-07 19:44:19

by K.Prasad

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

On Sat, Jan 05, 2008 at 07:46:32AM -0500, Mathieu Desnoyers wrote:
> * Gautham R Shenoy ([email protected]) wrote:
> > On Thu, Jan 03, 2008 at 11:30:55AM -0500, Mathieu Desnoyers wrote:
> > > * K. Prasad ([email protected]) wrote:
> > > > This patch converts the tracing mechanism of Preempt RCU boosting into
> > > > markers. The handler functions for these markers are included inside
> > > > rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
> > > > chosen.
> > > >
> > > > Signed-off-by: K.Prasad <[email protected]>
> > > > ---
> > > > include/linux/rcupreempt_trace.h | 46 ++++++++
> > > > kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
> > > > kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
> > > > 3 files changed, 251 insertions(+), 189 deletions(-)
> > > >
> > > > Index: linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > ===================================================================
> > > > --- linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW.orig/include/linux/rcupreempt_trace.h
> > > > +++ linux-2.6.24-rc2-rt1.MARKER_PATCHES_NEW/include/linux/rcupreempt_trace.h
> > > > @@ -102,5 +102,51 @@ extern int rcupreempt_flip_flag(int cpu)
> > > > extern int rcupreempt_mb_flag(int cpu);
> > > > extern char *rcupreempt_try_flip_state_name(void);
> > > >
> > > > +#ifdef CONFIG_PREEMPT_RCU_BOOST
> > > > +struct preempt_rcu_boost_trace {
> > > > + unsigned long rbs_stat_task_boost_called;
> > > > + unsigned long rbs_stat_task_boosted;
> > > > + unsigned long rbs_stat_boost_called;
> > > > + unsigned long rbs_stat_try_boost;
> > > > + unsigned long rbs_stat_boosted;
> > > > + unsigned long rbs_stat_unboost_called;
> > > > + unsigned long rbs_stat_unboosted;
> > > > + unsigned long rbs_stat_try_boost_readers;
> > > > + unsigned long rbs_stat_boost_readers;
> > > > + unsigned long rbs_stat_try_unboost_readers;
> > > > + unsigned long rbs_stat_unboost_readers;
> > > > + unsigned long rbs_stat_over_taken;
> > > > +};
> > > > +
> > > > +#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
> > > > +void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
> > > > + void *private_data, const char *format, ...) \
> > > > +{ \
> > > > + va_list ap; \
> > > > + int cpu; \
> > > > + struct preempt_rcu_boost_trace *boost_trace; \
> > > > + va_start(ap, format); \
> > > > + cpu = va_arg(ap, typeof(unsigned int)); \
> > >
> > > (sorry for late response, I just came back from vacation)
> > >
> > > Instead of passing the cpu id as a marker parameter, why don't you
> > > simply use smp_processor_id() right here ?
> > >
> >
> > Agreed.
> >
> > Also in patch 1, in cases where we're using markers in place of RCU_TRACE_ME(),
> > we need not pass the parameter and simply use smp_processor_id().
> >
> > It's only the RCU_TRACE_RDP() that requires cpuid to be passed.
> >
>
>
> And it would help if you declare the format string with something like :
>
> "cpuid %u" instead of "%u"
>
> This way, if a generic tracer like LTTng dumps the traces and later a
> plugin in an analysis tool like LTTV have to hook on a specific field of
> your event, it can access it by specifying the marker name and field
> name. And even if fields are added to this event, it won't break
> compatibility as long as this field is there and have the same name.
>
> Mathieu

Hi Mathieu,
Thanks for your suggestions.

Even in functions such as rcu_check_callbacks_rt, rcu_advance_callbacks_rt and
__rcu_advance_callbacks, where a call to RCU_TRACE_RDP() is made, the
'cpu' variable is populated at an early stage with the
return value of smp_processor_id().

__rcu_advance_callbacks() <-- rcu_check_callbacks_rt() <--
rcu_check_callbacks() <-- update_process_times().

update_process_times() [if understood correctly, is called at timer
interrupt level] populates the 'cpu' with smp_processor_id().

There doesn't seem to be a need to pass down the 'cpu' parameter for
the above RCU functions including respective trace_mark() calls.

I am re-sending the patches with your suggested changes in subsequent
mails.

Thanks,
K.Prasad

>
> >
> > Thanks and Regards
> > gautham.
> >
> >
> > > --
> > > Mathieu Desnoyers
> > > Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> > > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
> >
> > --
> > Gautham R Shenoy
> > Linux Technology Center
> > IBM India.
> > "Freedom comes with a price tag of responsibility, which is still a bargain,
> > because Freedom is priceless!"
>
> --
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-01-07 19:51:21

by K.Prasad

[permalink] [raw]
Subject: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II

Hi Ingo,
Please accept these patches into the rt tree which convert the
existing RCU tracing mechanism for Preempt RCU and RCU Boost into
markers.

These patches are based upon the 2.6.24-rc5-rt1 kernel tree.

Along with marker transition, the RCU Tracing infrastructure has also
been modularised to be built as a kernel module, thereby enabling
runtime changes to the RCU Tracing infrastructure.

Patch [1/2] - Patch that converts the Preempt RCU tracing in
rcupreempt.c into markers.

Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
rcupreempt-boost.c into markers.

Thanks,
K.Prasad
([email protected])

2008-01-07 19:55:54

by K.Prasad

[permalink] [raw]
Subject: [PATCH 1/2] Markers Implementation for RCU Preempt Tracing - Ver II

This patch converts Preempt RCU Tracing code infrastructure to implement
markers.

- The rcupreempt_trace structure has been moved to the tracing
infrastructure and de-linked from the rcupreempt.c code. A per-cpu
instance of rcupreempt_trace structure will be maintained in
rcupreempt_trace.c

- The above change also renders a few macro definitions unused (such as
RCU_TRACE_CPU, RCU_TRACE_ME and RCU_TRACE_RDP) which have been
removed.

- Some of the helper functions in rcupreempt.c which were exported only
when CONFIG_RCU_TRACE was set are now exported unconditionally. These
functions operate on per-cpu variables that are used both by the RCU
and RCU Tracing code. The changes help in making RCU Tracing code
operate as a kernel module also.

- The references to rcupreempt-boost tracing in the module
initialisation and cleanup have been removed here to enable kernel
build, but will be brought in after enclosing them inside a #ifdef
CONFIG_PREEMPT_RCU_BOOST.

Signed-off-by: K.Prasad <[email protected]>
---
include/linux/rcupreempt.h | 10 ----
include/linux/rcupreempt_trace.h | 50 ++++++++++++------------
kernel/Kconfig.preempt | 7 +--
kernel/rcupreempt.c | 77 ++++++++++----------------------------
kernel/rcupreempt_trace.c | 79 +++++++++++++++++++++++++++++++++++++--
5 files changed, 125 insertions(+), 98 deletions(-)

Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt.h
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/include/linux/rcupreempt.h
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt.h
@@ -96,16 +96,6 @@ extern int rcu_pending_rt(int cpu);
struct softirq_action;
extern void rcu_process_callbacks_rt(struct softirq_action *unused);

-#ifdef CONFIG_RCU_TRACE
-struct rcupreempt_trace;
-extern int *rcupreempt_flipctr(int cpu);
-extern long rcupreempt_data_completed(void);
-extern int rcupreempt_flip_flag(int cpu);
-extern int rcupreempt_mb_flag(int cpu);
-extern char *rcupreempt_try_flip_state_name(void);
-extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
-#endif
-
struct softirq_action;

#ifdef CONFIG_NO_HZ
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt_trace.h
@@ -69,32 +69,32 @@ struct rcupreempt_trace {
long rcu_try_flip_m2;
};

-#ifdef CONFIG_RCU_TRACE
-#define RCU_TRACE(fn, arg) fn(arg);
-#else
-#define RCU_TRACE(fn, arg)
-#endif
+struct rcupreempt_probe_data {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+#define DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_worker) \
+void rcupreempt_trace_worker##_callback(const struct marker *mdata, \
+ void *private_data, const char *format, ...) \
+{ \
+ struct rcupreempt_trace *trace; \
+ trace = (&per_cpu(trace_data, smp_processor_id())); \
+ rcupreempt_trace_worker(trace); \
+}
+
+#define INIT_RCUPREEMPT_PROBE(rcupreempt_trace_worker) \
+{ \
+ .name = __stringify(rcupreempt_trace_worker), \
+ .probe_func = rcupreempt_trace_worker##_callback \
+}

-extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
+extern int *rcupreempt_flipctr(int cpu);
+extern long rcupreempt_data_completed(void);
+extern int rcupreempt_flip_flag(int cpu);
+extern int rcupreempt_mb_flag(int cpu);
+extern char *rcupreempt_try_flip_state_name(void);

#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt.c
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/kernel/rcupreempt.c
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt.c
@@ -54,7 +54,6 @@
#include <linux/delay.h>
#include <linux/byteorder/swabb.h>
#include <linux/cpumask.h>
-#include <linux/rcupreempt_trace.h>

/*
* PREEMPT_RCU data structures.
@@ -71,9 +70,6 @@ struct rcu_data {
struct rcu_head **waittail[GP_STAGES];
struct rcu_head *donelist;
struct rcu_head **donetail;
-#ifdef CONFIG_RCU_TRACE
- struct rcupreempt_trace trace;
-#endif /* #ifdef CONFIG_RCU_TRACE */
};
struct rcu_ctrlblk {
raw_spinlock_t fliplock; /* Protect state-machine transitions. */
@@ -97,10 +93,8 @@ enum rcu_try_flip_states {
rcu_try_flip_waitmb_state /* "M" */
};
static enum rcu_try_flip_states rcu_try_flip_state = rcu_try_flip_idle_state;
-#ifdef CONFIG_RCU_TRACE
static char *rcu_try_flip_state_names[] =
{ "idle", "waitack", "waitzero", "waitmb" };
-#endif /* #ifdef CONFIG_RCU_TRACE */

/*
* Enum and per-CPU flag to determine when each CPU has seen
@@ -147,24 +141,6 @@ static cpumask_t rcu_cpu_online_map = CP
#define RCU_DATA_CPU(cpu) (&per_cpu(rcu_data, cpu))

/*
- * Helper macro for tracing when the appropriate rcu_data is not
- * cached in a local variable, but where the CPU number is so cached.
- */
-#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
-
-/*
- * Helper macro for tracing when the appropriate rcu_data is not
- * cached in a local variable.
- */
-#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
-
-/*
- * Helper macro for tracing when the appropriate rcu_data is pointed
- * to by a local variable.
- */
-#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
-
-/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
*/
@@ -332,7 +308,7 @@ static void __rcu_advance_callbacks(stru
if (rdp->waitlist[GP_STAGES - 1] != NULL) {
*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
rdp->donetail = rdp->waittail[GP_STAGES - 1];
- RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
+ trace_mark(rcupreempt_trace_move2done, "NULL");
}
for (i = GP_STAGES - 2; i >= 0; i--) {
if (rdp->waitlist[i] != NULL) {
@@ -351,7 +327,7 @@ static void __rcu_advance_callbacks(stru
wlc++;
rdp->nextlist = NULL;
rdp->nexttail = &rdp->nextlist;
- RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
+ trace_mark(rcupreempt_trace_move2wait, "NULL");
} else {
rdp->waitlist[0] = NULL;
rdp->waittail[0] = &rdp->waitlist[0];
@@ -595,9 +571,9 @@ rcu_try_flip_idle(void)
{
int cpu;

- RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
+ trace_mark(rcupreempt_trace_try_flip_i1, "NULL");
if (!rcu_pending(smp_processor_id())) {
- RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
+ trace_mark(rcupreempt_trace_try_flip_ie1, "NULL");
return 0;
}

@@ -605,7 +581,7 @@ rcu_try_flip_idle(void)
* Do the flip.
*/

- RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
+ trace_mark(rcupreempt_trace_try_flip_g1, "NULL");
rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */

/*
@@ -635,11 +611,11 @@ rcu_try_flip_waitack(void)
{
int cpu;

- RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
+ trace_mark(rcupreempt_trace_try_flip_a1, "NULL");
for_each_cpu_mask(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitack_needed(cpu) &&
per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
- RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
+ trace_mark(rcupreempt_trace_try_flip_ae1, "NULL");
return 0;
}

@@ -649,7 +625,7 @@ rcu_try_flip_waitack(void)
*/

smp_mb(); /* see above block comment. */
- RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
+ trace_mark(rcupreempt_trace_try_flip_a2, "NULL");
return 1;
}

@@ -667,11 +643,11 @@ rcu_try_flip_waitzero(void)

/* Check to see if the sum of the "last" counters is zero. */

- RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
+ trace_mark(rcupreempt_trace_try_flip_z1, "NULL");
for_each_possible_cpu(cpu)
sum += per_cpu(rcu_flipctr, cpu)[lastidx];
if (sum != 0) {
- RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
+ trace_mark(rcupreempt_trace_try_flip_ze1, "NULL");
return 0;
}

@@ -684,7 +660,7 @@ rcu_try_flip_waitzero(void)
dyntick_save_progress_counter(cpu);
}

- RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
+ trace_mark(rcupreempt_trace_try_flip_z2, "NULL");
return 1;
}

@@ -698,16 +674,16 @@ rcu_try_flip_waitmb(void)
{
int cpu;

- RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
+ trace_mark(rcupreempt_trace_try_flip_m1, "NULL");
for_each_cpu_mask(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitmb_needed(cpu) &&
per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
- RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
+ trace_mark(rcupreempt_trace_try_flip_me1, "NULL");
return 0;
}

smp_mb(); /* Ensure that the above checks precede any following flip. */
- RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
+ trace_mark(rcupreempt_trace_try_flip_m2, "NULL");
return 1;
}

@@ -724,9 +700,9 @@ static void rcu_try_flip(void)
{
unsigned long oldirq;

- RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
+ trace_mark(rcupreempt_trace_try_flip_1, "NULL");
if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, oldirq))) {
- RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
+ trace_mark(rcupreempt_trace_try_flip_e1, "NULL");
return;
}

@@ -778,7 +754,7 @@ void rcu_check_callbacks_rt(int cpu, int
if (rcu_ctrlblk.completed == rdp->completed)
rcu_try_flip();
spin_lock_irqsave(&rdp->lock, oldirq);
- RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
+ trace_mark(rcupreempt_trace_check_callbacks, "NULL");
__rcu_advance_callbacks(rdp);
spin_unlock_irqrestore(&rdp->lock, oldirq);
}
@@ -798,7 +774,7 @@ void rcu_advance_callbacks_rt(int cpu, i
return;
}
spin_lock_irqsave(&rdp->lock, oldirq);
- RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
+ trace_mark(rcupreempt_trace_check_callbacks, "NULL");
__rcu_advance_callbacks(rdp);
spin_unlock_irqrestore(&rdp->lock, oldirq);
}
@@ -900,13 +876,13 @@ void rcu_process_callbacks_rt(struct sof
}
rdp->donelist = NULL;
rdp->donetail = &rdp->donelist;
- RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
+ trace_mark(rcupreempt_trace_done_remove, "NULL");
spin_unlock_irqrestore(&rdp->lock, flags);
while (list) {
next = list->next;
list->func(list);
list = next;
- RCU_TRACE_ME(rcupreempt_trace_invoke);
+ trace_mark(rcupreempt_trace_invoke, "NULL");
}
}

@@ -924,7 +900,7 @@ void fastcall call_rcu_preempt(struct rc
__rcu_advance_callbacks(rdp);
*rdp->nexttail = head;
rdp->nexttail = &head->next;
- RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
+ trace_mark(rcupreempt_trace_next_add, "NULL");
spin_unlock(&rdp->lock);
local_irq_restore(oldirq);
}
@@ -1007,7 +983,6 @@ void synchronize_kernel(void)
synchronize_rcu();
}

-#ifdef CONFIG_RCU_TRACE
int *rcupreempt_flipctr(int cpu)
{
return &per_cpu(rcu_flipctr, cpu)[0];
@@ -1031,13 +1006,3 @@ char *rcupreempt_try_flip_state_name(voi
return rcu_try_flip_state_names[rcu_try_flip_state];
}
EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
-
-struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
-{
- struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-
- return &rdp->trace;
-}
-EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
-
-#endif /* #ifdef RCU_TRACE */
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt_trace.c
@@ -43,11 +43,19 @@
#include <linux/mutex.h>
#include <linux/rcupreempt_trace.h>
#include <linux/debugfs.h>
+#include <linux/percpu.h>

static struct mutex rcupreempt_trace_mutex;
static char *rcupreempt_trace_buf;
#define RCUPREEMPT_TRACE_BUF_SIZE 4096

+static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);
+
+struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
+{
+ return &per_cpu(trace_data, cpu);
+}
+
void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
{
trace->done_length += trace->wait_length;
@@ -135,6 +143,51 @@ void rcupreempt_trace_next_add(struct rc
trace->next_length++;
}

+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_move2done);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_move2wait);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_e1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_i1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_ie1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_g1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_a1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_ae1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_a2);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_z1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_ze1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_z2);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_m1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_me1);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_try_flip_m2);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_check_callbacks);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_done_remove);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_invoke);
+DEFINE_RCUPREEMPT_MARKER_HANDLER(rcupreempt_trace_next_add);
+
+static struct rcupreempt_probe_data rcupreempt_probe_array[] =
+{
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_move2done),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_move2wait),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_e1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_i1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_ie1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_g1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_a1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_ae1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_a2),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_z1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_ze1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_z2),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_m1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_me1),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_try_flip_m2),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_check_callbacks),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_done_remove),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_invoke),
+ INIT_RCUPREEMPT_PROBE(rcupreempt_trace_next_add)
+};
+
static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
{
struct rcupreempt_trace *cp;
@@ -297,9 +350,6 @@ static int rcupreempt_debugfs_init(void)
if (!ctrsdir)
goto free_out;

- if (!rcu_trace_boost_create(rcudir))
- goto free_out;
-
return 0;
free_out:
if (ctrsdir)
@@ -316,6 +366,21 @@ out:
static int __init rcupreempt_trace_init(void)
{
int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rcupreempt_probe_array); i++) {
+ struct rcupreempt_probe_data *p = &rcupreempt_probe_array[i];
+ ret = marker_probe_register(p->name, p->format,
+ p->probe_func, p);
+ if (ret)
+ printk(KERN_INFO "Unable to register rcupreempt \
+ probe %s\n", rcupreempt_probe_array[i].name);
+ ret = marker_arm(p->name);
+ if (ret)
+ printk(KERN_INFO "Unable to arm rcupreempt probe %s\n",
+ p->name);
+ }
+ printk(KERN_INFO "RCU Preempt markers registered\n");

mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
@@ -329,7 +394,12 @@ static int __init rcupreempt_trace_init(

static void __exit rcupreempt_trace_cleanup(void)
{
- rcu_trace_boost_destroy();
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rcupreempt_probe_array); i++)
+ marker_probe_unregister(rcupreempt_probe_array[i].name);
+ printk(KERN_INFO "RCU Preempt markers unregistered\n");
+
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
@@ -337,6 +407,7 @@ static void __exit rcupreempt_trace_clea
kfree(rcupreempt_trace_buf);
}

+MODULE_LICENSE("GPL");

module_init(rcupreempt_trace_init);
module_exit(rcupreempt_trace_cleanup);
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/Kconfig.preempt
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/kernel/Kconfig.preempt
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/Kconfig.preempt
@@ -172,14 +172,15 @@ config PREEMPT_RCU_BOOST
possible OOM problems.

config RCU_TRACE
- bool "Enable tracing for RCU - currently stats in debugfs"
+ tristate "Enable tracing for RCU - currently stats in debugfs"
select DEBUG_FS
- default y
+ select MARKERS
+ default m
help
This option provides tracing in RCU which presents stats
in debugfs for debugging RCU implementation.

- Say Y here if you want to enable RCU tracing
+ Say Y/M here if you want to enable RCU tracing in-kernel/module.
Say N if you are unsure.

config SPINLOCK_BKL

2008-01-07 19:57:22

by K.Prasad

[permalink] [raw]
Subject: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing - Ver II

This patch converts the tracing mechanism of Preempt RCU boosting into
markers. The handler functions for these markers are included inside
rcupreempt_trace.c and will be included only when PREEMPT_RCU_BOOST is
chosen.

Signed-off-by: K.Prasad <[email protected]>
---
include/linux/rcupreempt_trace.h | 40 +++++++
kernel/rcupreempt-boost.c | 211 ++++-----------------------------------
kernel/rcupreempt_trace.c | 183 +++++++++++++++++++++++++++++++++
3 files changed, 245 insertions(+), 189 deletions(-)

Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt_trace.h
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/include/linux/rcupreempt_trace.h
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/include/linux/rcupreempt_trace.h
@@ -95,5 +95,45 @@ extern int rcupreempt_flip_flag(int cpu)
extern int rcupreempt_mb_flag(int cpu);
extern char *rcupreempt_try_flip_state_name(void);

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+struct preempt_rcu_boost_trace {
+ unsigned long rbs_stat_task_boost_called;
+ unsigned long rbs_stat_task_boosted;
+ unsigned long rbs_stat_boost_called;
+ unsigned long rbs_stat_try_boost;
+ unsigned long rbs_stat_boosted;
+ unsigned long rbs_stat_unboost_called;
+ unsigned long rbs_stat_unboosted;
+ unsigned long rbs_stat_try_boost_readers;
+ unsigned long rbs_stat_boost_readers;
+ unsigned long rbs_stat_try_unboost_readers;
+ unsigned long rbs_stat_unboost_readers;
+ unsigned long rbs_stat_over_taken;
+};
+
+#define DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(preempt_rcu_boost_var) \
+void preempt_rcu_boost_var##_callback(const struct marker *mdata, \
+ void *private_data, const char *format, ...) \
+{ \
+ struct preempt_rcu_boost_trace *boost_trace; \
+ boost_trace = (&per_cpu(boost_trace_data, smp_processor_id())); \
+ boost_trace->rbs_stat_##preempt_rcu_boost_var++; \
+}
+
+struct preempt_rcu_boost_probe {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+#define INIT_PREEMPT_RCU_BOOST_PROBE(preempt_rcu_boost_probe_worker) \
+{ \
+ .name = __stringify(preempt_rcu_boost_probe_worker), \
+ .probe_func = preempt_rcu_boost_probe_worker##_callback \
+}
+
+extern int read_rcu_boost_prio(void);
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
#endif /* __KERNEL__ */
#endif /* __LINUX_RCUPREEMPT_TRACE_H */
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt_trace.c
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/kernel/rcupreempt_trace.c
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt_trace.c
@@ -51,6 +51,163 @@ static char *rcupreempt_trace_buf;

static DEFINE_PER_CPU(struct rcupreempt_trace, trace_data);

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
+static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
+static DEFINE_PER_CPU(struct preempt_rcu_boost_trace, boost_trace_data);
+
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(task_boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_called);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboosted);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(boost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(try_unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(unboost_readers);
+DEFINE_PREEMPT_RCU_BOOST_MARKER_HANDLER(over_taken);
+
+static struct preempt_rcu_boost_probe preempt_rcu_boost_probe_array[] =
+{
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(task_boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_called),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboosted),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(boost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(try_unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(unboost_readers),
+ INIT_PREEMPT_RCU_BOOST_PROBE(over_taken)
+};
+
+static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ static DEFINE_MUTEX(mutex);
+ int cnt = 0;
+ int cpu;
+ struct preempt_rcu_boost_trace *prbt;
+ ssize_t bcount;
+ unsigned long task_boost_called = 0;
+ unsigned long task_boosted = 0;
+ unsigned long boost_called = 0;
+ unsigned long try_boost = 0;
+ unsigned long boosted = 0;
+ unsigned long unboost_called = 0;
+ unsigned long unboosted = 0;
+ unsigned long try_boost_readers = 0;
+ unsigned long boost_readers = 0;
+ unsigned long try_unboost_readers = 0;
+ unsigned long unboost_readers = 0;
+ unsigned long over_taken = 0;
+
+ mutex_lock(&mutex);
+
+ for_each_online_cpu(cpu) {
+ prbt = &per_cpu(boost_trace_data, cpu);
+
+ task_boost_called += prbt->rbs_stat_task_boost_called;
+ task_boosted += prbt->rbs_stat_task_boosted;
+ boost_called += prbt->rbs_stat_boost_called;
+ try_boost += prbt->rbs_stat_try_boost;
+ boosted += prbt->rbs_stat_boosted;
+ unboost_called += prbt->rbs_stat_unboost_called;
+ unboosted += prbt->rbs_stat_unboosted;
+ try_boost_readers += prbt->rbs_stat_try_boost_readers;
+ boost_readers += prbt->rbs_stat_boost_readers;
+ try_unboost_readers += prbt->rbs_stat_try_boost_readers;
+ unboost_readers += prbt->rbs_stat_boost_readers;
+ over_taken += prbt->rbs_stat_over_taken;
+ }
+
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boost_called = %ld\n",
+ task_boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "task_boosted = %ld\n",
+ task_boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_called = %ld\n",
+ boost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost = %ld\n",
+ try_boost);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boosted = %ld\n",
+ boosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_called = %ld\n",
+ unboost_called);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboosted = %ld\n",
+ unboosted);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_boost_readers = %ld\n",
+ try_boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "boost_readers = %ld\n",
+ boost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "try_unboost_readers = %ld\n",
+ try_unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "unboost_readers = %ld\n",
+ unboost_readers);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "over_taken = %ld\n",
+ over_taken);
+ cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
+ RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
+ "rcu_boost_prio = %d\n",
+ read_rcu_boost_prio());
+ bcount = simple_read_from_buffer(buffer, count, ppos,
+ rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
+ mutex_unlock(&mutex);
+
+ return bcount;
+}
+
+static struct file_operations rcuboost_fops = {
+ .read = rcuboost_read,
+};
+
+static struct dentry *rcuboostdir;
+int rcu_trace_boost_create(struct dentry *rcudir)
+{
+ rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
+ NULL, &rcuboost_fops);
+ if (!rcuboostdir)
+ return 0;
+
+ return 1;
+}
+
+void rcu_trace_boost_destroy(void)
+{
+ if (rcuboostdir)
+ debugfs_remove(rcuboostdir);
+ rcuboostdir = NULL;
+}
+
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
{
return &per_cpu(trace_data, cpu);
@@ -350,6 +507,10 @@ static int rcupreempt_debugfs_init(void)
if (!ctrsdir)
goto free_out;

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ if (!rcu_trace_boost_create(rcudir))
+ goto free_out;
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
return 0;
free_out:
if (ctrsdir)
@@ -382,6 +543,22 @@ static int __init rcupreempt_trace_init(
}
printk(KERN_INFO "RCU Preempt markers registered\n");

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++) {
+ struct preempt_rcu_boost_probe *p = \
+ &preempt_rcu_boost_probe_array[i];
+ ret = marker_probe_register(p->name, p->format,
+ p->probe_func, p);
+ if (ret)
+ printk(KERN_INFO "Unable to register Preempt RCU Boost \
+ probe %s\n", preempt_rcu_boost_probe_array[i].name);
+ ret = marker_arm(p->name);
+ if (ret)
+ printk(KERN_INFO "Unable to arm Preempt RCU Boost \
+ markers %s\n", p->name);
+}
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
+
mutex_init(&rcupreempt_trace_mutex);
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
if (!rcupreempt_trace_buf)
@@ -400,6 +577,12 @@ static void __exit rcupreempt_trace_clea
marker_probe_unregister(rcupreempt_probe_array[i].name);
printk(KERN_INFO "RCU Preempt markers unregistered\n");

+#ifdef CONFIG_PREEMPT_RCU_BOOST
+ rcu_trace_boost_destroy();
+ for (i = 0; i < ARRAY_SIZE(preempt_rcu_boost_probe_array); i++)
+ marker_probe_unregister(preempt_rcu_boost_probe_array[i].name);
+ printk(KERN_INFO "Preempt RCU Boost markers unregistered\n");
+#endif /* CONFIG_PREEMPT_RCU_BOOST */
debugfs_remove(statdir);
debugfs_remove(gpdir);
debugfs_remove(ctrsdir);
Index: linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt-boost.c
===================================================================
--- linux-2.6.24-rc5-rt1.RCU_MARKERS.orig/kernel/rcupreempt-boost.c
+++ linux-2.6.24-rc5-rt1.RCU_MARKERS/kernel/rcupreempt-boost.c
@@ -40,186 +40,9 @@ struct rcu_boost_dat {
int rbs_prio; /* CPU copy of rcu_boost_prio */
struct list_head rbs_toboost; /* Preempted RCU readers */
struct list_head rbs_boosted; /* RCU readers that have been boosted */
-#ifdef CONFIG_RCU_TRACE
- /* The rest are for statistics */
- unsigned long rbs_stat_task_boost_called;
- unsigned long rbs_stat_task_boosted;
- unsigned long rbs_stat_boost_called;
- unsigned long rbs_stat_try_boost;
- unsigned long rbs_stat_boosted;
- unsigned long rbs_stat_unboost_called;
- unsigned long rbs_stat_unboosted;
- unsigned long rbs_stat_try_boost_readers;
- unsigned long rbs_stat_boost_readers;
- unsigned long rbs_stat_try_unboost_readers;
- unsigned long rbs_stat_unboost_readers;
- unsigned long rbs_stat_over_taken;
-#endif /* CONFIG_RCU_TRACE */
};

static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_data);
-#define RCU_BOOST_ME &__get_cpu_var(rcu_boost_data)
-
-#ifdef CONFIG_RCU_TRACE
-
-#define RCUPREEMPT_BOOST_TRACE_BUF_SIZE 4096
-static char rcupreempt_boost_trace_buf[RCUPREEMPT_BOOST_TRACE_BUF_SIZE];
-
-static ssize_t rcuboost_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- static DEFINE_MUTEX(mutex);
- int cnt = 0;
- int cpu;
- struct rcu_boost_dat *rbd;
- ssize_t bcount;
- unsigned long task_boost_called = 0;
- unsigned long task_boosted = 0;
- unsigned long boost_called = 0;
- unsigned long try_boost = 0;
- unsigned long boosted = 0;
- unsigned long unboost_called = 0;
- unsigned long unboosted = 0;
- unsigned long try_boost_readers = 0;
- unsigned long boost_readers = 0;
- unsigned long try_unboost_readers = 0;
- unsigned long unboost_readers = 0;
- unsigned long over_taken = 0;
-
- mutex_lock(&mutex);
-
- for_each_online_cpu(cpu) {
- rbd = &per_cpu(rcu_boost_data, cpu);
-
- task_boost_called += rbd->rbs_stat_task_boost_called;
- task_boosted += rbd->rbs_stat_task_boosted;
- boost_called += rbd->rbs_stat_boost_called;
- try_boost += rbd->rbs_stat_try_boost;
- boosted += rbd->rbs_stat_boosted;
- unboost_called += rbd->rbs_stat_unboost_called;
- unboosted += rbd->rbs_stat_unboosted;
- try_boost_readers += rbd->rbs_stat_try_boost_readers;
- boost_readers += rbd->rbs_stat_boost_readers;
- try_unboost_readers += rbd->rbs_stat_try_boost_readers;
- unboost_readers += rbd->rbs_stat_boost_readers;
- over_taken += rbd->rbs_stat_over_taken;
- }
-
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boost_called = %ld\n",
- task_boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "task_boosted = %ld\n",
- task_boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_called = %ld\n",
- boost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost = %ld\n",
- try_boost);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boosted = %ld\n",
- boosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_called = %ld\n",
- unboost_called);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboosted = %ld\n",
- unboosted);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_boost_readers = %ld\n",
- try_boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "boost_readers = %ld\n",
- boost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "try_unboost_readers = %ld\n",
- try_unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "unboost_readers = %ld\n",
- unboost_readers);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "over_taken = %ld\n",
- over_taken);
- cnt += snprintf(&rcupreempt_boost_trace_buf[cnt],
- RCUPREEMPT_BOOST_TRACE_BUF_SIZE - cnt,
- "rcu_boost_prio = %d\n",
- rcu_boost_prio);
- bcount = simple_read_from_buffer(buffer, count, ppos,
- rcupreempt_boost_trace_buf, strlen(rcupreempt_boost_trace_buf));
- mutex_unlock(&mutex);
-
- return bcount;
-}
-
-static struct file_operations rcuboost_fops = {
- .read = rcuboost_read,
-};
-
-static struct dentry *rcuboostdir;
-int rcu_trace_boost_create(struct dentry *rcudir)
-{
- rcuboostdir = debugfs_create_file("rcuboost", 0444, rcudir,
- NULL, &rcuboost_fops);
- if (!rcuboostdir)
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_create);
-
-void rcu_trace_boost_destroy(void)
-{
- if (rcuboostdir)
- debugfs_remove(rcuboostdir);
- rcuboostdir = NULL;
-}
-EXPORT_SYMBOL_GPL(rcu_trace_boost_destroy);
-
-#define RCU_BOOST_TRACE_FUNC_DECL(type) \
- static void rcu_trace_boost_##type(struct rcu_boost_dat *rbd) \
- { \
- rbd->rbs_stat_##type++; \
- }
-RCU_BOOST_TRACE_FUNC_DECL(task_boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(task_boosted)
-RCU_BOOST_TRACE_FUNC_DECL(boost_called)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost)
-RCU_BOOST_TRACE_FUNC_DECL(boosted)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_called)
-RCU_BOOST_TRACE_FUNC_DECL(unboosted)
-RCU_BOOST_TRACE_FUNC_DECL(try_boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(boost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(try_unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(unboost_readers)
-RCU_BOOST_TRACE_FUNC_DECL(over_taken)
-#else /* CONFIG_RCU_TRACE */
-/* These were created by the above macro "RCU_BOOST_TRACE_FUNC_DECL" */
-# define rcu_trace_boost_task_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_task_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_boost_called(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost(rbd) do { } while (0)
-# define rcu_trace_boost_boosted(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_called(rbd) do { } while (0)
-# define rcu_trace_boost_unboosted(rbd) do { } while (0)
-# define rcu_trace_boost_try_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_boost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_try_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_unboost_readers(rbd) do { } while (0)
-# define rcu_trace_boost_over_taken(rbd) do { } while (0)
-#endif /* CONFIG_RCU_TRACE */

static inline int rcu_is_boosted(struct task_struct *task)
{
@@ -234,10 +57,10 @@ static void rcu_boost_task(struct task_s
WARN_ON(!irqs_disabled());
WARN_ON_SMP(!spin_is_locked(&task->pi_lock));

- rcu_trace_boost_task_boost_called(RCU_BOOST_ME);
+ trace_mark(task_boost_called, "NULL");

if (task->rcu_prio < task->prio) {
- rcu_trace_boost_task_boosted(RCU_BOOST_ME);
+ trace_mark(task_boosted, "NULL");
task_setprio(task, task->rcu_prio);
}
}
@@ -261,7 +84,7 @@ void __rcu_preempt_boost(void)

WARN_ON(!current->rcu_read_lock_nesting);

- rcu_trace_boost_boost_called(RCU_BOOST_ME);
+ trace_mark(boost_called, "NULL");

/* check to see if we are already boosted */
if (unlikely(rcu_is_boosted(curr)))
@@ -279,7 +102,7 @@ void __rcu_preempt_boost(void)

curr->rcub_rbdp = rbd;

- rcu_trace_boost_try_boost(rbd);
+ trace_mark(try_boost, "NULL");

prio = rt_mutex_getprio(curr);

@@ -288,7 +111,7 @@ void __rcu_preempt_boost(void)
if (prio <= rbd->rbs_prio)
goto out;

- rcu_trace_boost_boosted(curr->rcub_rbdp);
+ trace_mark(boosted, "NULL");

curr->rcu_prio = rbd->rbs_prio;
rcu_boost_task(curr);
@@ -313,7 +136,7 @@ void __rcu_preempt_unboost(void)
int prio;
unsigned long flags;

- rcu_trace_boost_unboost_called(RCU_BOOST_ME);
+ trace_mark(unboost_called, "NULL");

/* if not boosted, then ignore */
if (likely(!rcu_is_boosted(curr)))
@@ -351,7 +174,7 @@ void __rcu_preempt_unboost(void)

list_del_init(&curr->rcub_entry);

- rcu_trace_boost_unboosted(rbd);
+ trace_mark(unboosted, "NULL");

curr->rcu_prio = MAX_PRIO;

@@ -412,7 +235,7 @@ static int __rcu_boost_readers(struct rc
* Another task may have taken over.
*/
if (curr->rcu_preempt_counter != rcu_boost_counter) {
- rcu_trace_boost_over_taken(rbd);
+ trace_mark(over_taken, "NULL");
return 1;
}

@@ -443,7 +266,7 @@ void rcu_boost_readers(void)

prio = rt_mutex_getprio(curr);

- rcu_trace_boost_try_boost_readers(RCU_BOOST_ME);
+ trace_mark(try_boost_readers, "NULL");

if (prio >= rcu_boost_prio) {
/* already boosted */
@@ -453,7 +276,7 @@ void rcu_boost_readers(void)

rcu_boost_prio = prio;

- rcu_trace_boost_boost_readers(RCU_BOOST_ME);
+ trace_mark(boost_readers, "NULL");

/* Flag that we are the one to unboost */
curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -486,12 +309,12 @@ void rcu_unboost_readers(void)

spin_lock_irqsave(&rcu_boost_wake_lock, flags);

- rcu_trace_boost_try_unboost_readers(RCU_BOOST_ME);
+ trace_mark(try_unboost_readers, "NULL");

if (current->rcu_preempt_counter != rcu_boost_counter)
goto out;

- rcu_trace_boost_unboost_readers(RCU_BOOST_ME);
+ trace_mark(unboost_readers, "NULL");

/*
* We could also put in something that
@@ -514,6 +337,16 @@ void rcu_unboost_readers(void)
}

/*
+ * This function exports the rcu_boost_prio variable for use by
+ * modules that need it e.g. RCU_TRACE module
+ */
+int read_rcu_boost_prio(void)
+{
+ return rcu_boost_prio;
+}
+EXPORT_SYMBOL_GPL(read_rcu_boost_prio);
+
+/*
* The krcupreemptd wakes up every "rcu_preempt_thread_secs"
* seconds at the minimum priority of 1 to do a
* synchronize_rcu. This ensures that grace periods finish

2008-01-14 14:37:48

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> * Ingo Molnar ([email protected]) wrote:
> >
> > * Frank Ch. Eigler <[email protected]> wrote:
> >
> > > > [...] this is a general policy matter. It is _so much easier_ to add
> > > > markers if they _can_ have near-zero overhead (as in 1-2
> > > > instructions). Otherwise we'll keep arguing about it, especially if
> > > > any is added to performance-critical codepath. (where we are
> > > > counting instructions)
> > >
> > > The effect of the immediate-values patch, combined with gcc
> > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > > dcache-impact-free instructions. The register saves, parameter
> > > evaluation, the function call, can all be moved out of line.
> >
> > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > we should already be getting that, right?
> >
> > There's one thing that would make out-of-line tracepoints have a lot
> > less objectionable to me: right now the 'out of line' area is put to the
> > end of functions. That splinters the kernel image with inactive, rarely
> > taken areas of code - blowing up its icache footprint considerably. For
> > example sched.o has ~100 functions, with the average function size being
> > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> > already.
>
> Hrm, I agree this can be a problem on architectures with more standard
> associative icaches, but aren't most x86_64 machines (and modern x86_32)
> using an instruction trace cache instead ? This makes the problem
> irrelevant.
>
> But I agree that, as Frank proposed, -freorder-blocks-and-partition
> could help us in that matter for the architectures using an associative
> L1 icache.

I thought trace cache died with P4?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-01-14 15:35:25

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* Pavel Machek ([email protected]) wrote:
> On Mon 2008-01-07 13:59:54, Mathieu Desnoyers wrote:
> > * Ingo Molnar ([email protected]) wrote:
> > >
> > > * Frank Ch. Eigler <[email protected]> wrote:
> > >
> > > > > [...] this is a general policy matter. It is _so much easier_ to add
> > > > > markers if they _can_ have near-zero overhead (as in 1-2
> > > > > instructions). Otherwise we'll keep arguing about it, especially if
> > > > > any is added to performance-critical codepath. (where we are
> > > > > counting instructions)
> > > >
> > > > The effect of the immediate-values patch, combined with gcc
> > > > CFLAGS+=-freorder-blocks, *is* to keep the overhead at 1-2
> > > > dcache-impact-free instructions. The register saves, parameter
> > > > evaluation, the function call, can all be moved out of line.
> > >
> > > well, -freorder-blocks seems to be default-enabled at -O2 on gcc 4.2, so
> > > we should already be getting that, right?
> > >
> > > There's one thing that would make out-of-line tracepoints have a lot
> > > less objectionable to me: right now the 'out of line' area is put to the
> > > end of functions. That splinters the kernel image with inactive, rarely
> > > taken areas of code - blowing up its icache footprint considerably. For
> > > example sched.o has ~100 functions, with the average function size being
> > > 200 bytes. At 64 bytes L1 cacheline size that's a 10-20% icache waste
> > > already.
> >
> > Hrm, I agree this can be a problem on architectures with more standard
> > associative icaches, but aren't most x86_64 machines (and modern x86_32)
> > using an instruction trace cache instead ? This makes the problem
> > irrelevant.
> >
> > But I agree that, as Frank proposed, -freorder-blocks-and-partition
> > could help us in that matter for the architectures using an associative
> > L1 icache.
>
> I thought trace cache died with P4?
> --

And you are absolutely right.

We would have to figure out if enabling -freorder-blocks-and-partition
makes sense kernel-wide.

Mathieu


--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-01-14 16:32:39

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing



On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
>
> We would have to figure out if enabling -freorder-blocks-and-partition
> makes sense kernel-wide.

Last I saw, it generates crappy code, with lots more jumps back and forth,
and the image just blows up.

There's a reason we use -Os, and that's that small footprint I$ is
generally more important than fake compiler optimizations that don't
actually help except on microbenchmarks where everything fits in the
cache.

Taking a branch instruction from two bytes to five is almost always a
mistake, unless you *know* that the code it jumps to will effectively
never be done at all (which is not necessarily the case at all). It also
makes debugging much nastier, because if now things like backtraces
probably look like crap too!

Don't go there. The *best* we can do is to just use the optimizations that
generate good-looking code that humans can read. The rest is just compiler
masturbation.

Linus

2008-01-14 19:42:21

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/2] Markers Implementation for Preempt RCU Boost Tracing

* Linus Torvalds ([email protected]) wrote:
>
>
> On Mon, 14 Jan 2008, Mathieu Desnoyers wrote:
> >
> > We would have to figure out if enabling -freorder-blocks-and-partition
> > makes sense kernel-wide.
>
> Last I saw, it generates crappy code, with lots more jumps back and forth,
> and the image just blows up.
>
> There's a reason we use -Os, and that's that small footprint I$ is
> generally more important than fake compiler optimizations that don't
> actually help except on microbenchmarks where everything fits in the
> cache.
>
> Taking a branch instruction from two bytes to five is almost always a
> mistake, unless you *know* that the code it jumps to will effectively
> never be done at all (which is not necessarily the case at all). It also
> makes debugging much nastier, because if now things like backtraces
> probably look like crap too!
>
> Don't go there. The *best* we can do is to just use the optimizations that
> generate good-looking code that humans can read. The rest is just compiler
> masturbation.
>

I agree that turning this flag on does not seem like an interesting
solution.

Well, I wonder how important this issue of not sharing L1 instruction
cachelines with scheduler code is. If we care as much about it as Ingo
states, I wonder why we leave about 22 BUG() macros in sched.c
(calculated from the number of ud2 instructions generated on x86), which
adds up to 7 bytes at the end of many scheduler functions (7 bytes
coming from ud2, jmp and .p2align on x86).

And 22 markers in sched.c is already much more than needed. I actually
propose only 5 in my patchset.

Mathieu

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-02-18 12:22:41

by Jan Kiszka

[permalink] [raw]
Subject: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II

K. Prasad wrote:
> Hi Ingo,
> Please accept these patches into the rt tree which convert the
> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> markers.
>
> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
>
> Along with marker transition, the RCU Tracing infrastructure has also
> been modularised to be built as a kernel module, thereby enabling
> runtime changes to the RCU Tracing infrastructure.
>
> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> rcupreempt.c into markers.
>
> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> rcupreempt-boost.c into markers.
>
> Thanks,
> K.Prasad
> ([email protected])

The correct marker annotation for "no arguments" is MARK_NOARGS.

Signed-off-by: Jan Kiszka <[email protected]>

---
kernel/rcupreempt-boost.c | 24 ++++++++++++------------
kernel/rcupreempt.c | 42 +++++++++++++++++++++---------------------
2 files changed, 33 insertions(+), 33 deletions(-)

Index: b/kernel/rcupreempt-boost.c
===================================================================
--- a/kernel/rcupreempt-boost.c
+++ b/kernel/rcupreempt-boost.c
@@ -57,10 +57,10 @@ static void rcu_boost_task(struct task_s
WARN_ON(!irqs_disabled());
WARN_ON_SMP(!spin_is_locked(&task->pi_lock));

- trace_mark(task_boost_called, "NULL");
+ trace_mark(task_boost_called, MARK_NOARGS);

if (task->rcu_prio < task->prio) {
- trace_mark(task_boosted, "NULL");
+ trace_mark(task_boosted, MARK_NOARGS);
task_setprio(task, task->rcu_prio);
}
}
@@ -84,7 +84,7 @@ void __rcu_preempt_boost(void)

WARN_ON(!current->rcu_read_lock_nesting);

- trace_mark(boost_called, "NULL");
+ trace_mark(boost_called, MARK_NOARGS);

/* check to see if we are already boosted */
if (unlikely(rcu_is_boosted(curr)))
@@ -102,7 +102,7 @@ void __rcu_preempt_boost(void)

curr->rcub_rbdp = rbd;

- trace_mark(try_boost, "NULL");
+ trace_mark(try_boost, MARK_NOARGS);

prio = rt_mutex_getprio(curr);

@@ -111,7 +111,7 @@ void __rcu_preempt_boost(void)
if (prio <= rbd->rbs_prio)
goto out;

- trace_mark(boosted, "NULL");
+ trace_mark(boosted, MARK_NOARGS);

curr->rcu_prio = rbd->rbs_prio;
rcu_boost_task(curr);
@@ -136,7 +136,7 @@ void __rcu_preempt_unboost(void)
int prio;
unsigned long flags;

- trace_mark(unboost_called, "NULL");
+ trace_mark(unboost_called, MARK_NOARGS);

/* if not boosted, then ignore */
if (likely(!rcu_is_boosted(curr)))
@@ -174,7 +174,7 @@ void __rcu_preempt_unboost(void)

list_del_init(&curr->rcub_entry);

- trace_mark(unboosted, "NULL");
+ trace_mark(unboosted, MARK_NOARGS);

curr->rcu_prio = MAX_PRIO;

@@ -235,7 +235,7 @@ static int __rcu_boost_readers(struct rc
* Another task may have taken over.
*/
if (curr->rcu_preempt_counter != rcu_boost_counter) {
- trace_mark(over_taken, "NULL");
+ trace_mark(over_taken, MARK_NOARGS);
return 1;
}

@@ -266,7 +266,7 @@ void rcu_boost_readers(void)

prio = rt_mutex_getprio(curr);

- trace_mark(try_boost_readers, "NULL");
+ trace_mark(try_boost_readers, MARK_NOARGS);

if (prio >= rcu_boost_prio) {
/* already boosted */
@@ -276,7 +276,7 @@ void rcu_boost_readers(void)

rcu_boost_prio = prio;

- trace_mark(boost_readers, "NULL");
+ trace_mark(boost_readers, MARK_NOARGS);

/* Flag that we are the one to unboost */
curr->rcu_preempt_counter = ++rcu_boost_counter;
@@ -309,12 +309,12 @@ void rcu_unboost_readers(void)

spin_lock_irqsave(&rcu_boost_wake_lock, flags);

- trace_mark(try_unboost_readers, "NULL");
+ trace_mark(try_unboost_readers, MARK_NOARGS);

if (current->rcu_preempt_counter != rcu_boost_counter)
goto out;

- trace_mark(unboost_readers, "NULL");
+ trace_mark(unboost_readers, MARK_NOARGS);

/*
* We could also put in something that
Index: b/kernel/rcupreempt.c
===================================================================
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -308,7 +308,7 @@ static void __rcu_advance_callbacks(stru
if (rdp->waitlist[GP_STAGES - 1] != NULL) {
*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
rdp->donetail = rdp->waittail[GP_STAGES - 1];
- trace_mark(rcupreempt_trace_move2done, "NULL");
+ trace_mark(rcupreempt_trace_move2done, MARK_NOARGS);
}
for (i = GP_STAGES - 2; i >= 0; i--) {
if (rdp->waitlist[i] != NULL) {
@@ -327,7 +327,7 @@ static void __rcu_advance_callbacks(stru
wlc++;
rdp->nextlist = NULL;
rdp->nexttail = &rdp->nextlist;
- trace_mark(rcupreempt_trace_move2wait, "NULL");
+ trace_mark(rcupreempt_trace_move2wait, MARK_NOARGS);
} else {
rdp->waitlist[0] = NULL;
rdp->waittail[0] = &rdp->waitlist[0];
@@ -571,9 +571,9 @@ rcu_try_flip_idle(void)
{
int cpu;

- trace_mark(rcupreempt_trace_try_flip_i1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_i1, MARK_NOARGS);
if (!rcu_pending(smp_processor_id())) {
- trace_mark(rcupreempt_trace_try_flip_ie1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_ie1, MARK_NOARGS);
return 0;
}

@@ -581,7 +581,7 @@ rcu_try_flip_idle(void)
* Do the flip.
*/

- trace_mark(rcupreempt_trace_try_flip_g1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_g1, MARK_NOARGS);
rcu_ctrlblk.completed++; /* stands in for rcu_try_flip_g2 */

/*
@@ -611,11 +611,11 @@ rcu_try_flip_waitack(void)
{
int cpu;

- trace_mark(rcupreempt_trace_try_flip_a1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_a1, MARK_NOARGS);
for_each_cpu_mask(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitack_needed(cpu) &&
per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
- trace_mark(rcupreempt_trace_try_flip_ae1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_ae1, MARK_NOARGS);
return 0;
}

@@ -625,7 +625,7 @@ rcu_try_flip_waitack(void)
*/

smp_mb(); /* see above block comment. */
- trace_mark(rcupreempt_trace_try_flip_a2, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_a2, MARK_NOARGS);
return 1;
}

@@ -643,11 +643,11 @@ rcu_try_flip_waitzero(void)

/* Check to see if the sum of the "last" counters is zero. */

- trace_mark(rcupreempt_trace_try_flip_z1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_z1, MARK_NOARGS);
for_each_possible_cpu(cpu)
sum += per_cpu(rcu_flipctr, cpu)[lastidx];
if (sum != 0) {
- trace_mark(rcupreempt_trace_try_flip_ze1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_ze1, MARK_NOARGS);
return 0;
}

@@ -660,7 +660,7 @@ rcu_try_flip_waitzero(void)
dyntick_save_progress_counter(cpu);
}

- trace_mark(rcupreempt_trace_try_flip_z2, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_z2, MARK_NOARGS);
return 1;
}

@@ -674,16 +674,16 @@ rcu_try_flip_waitmb(void)
{
int cpu;

- trace_mark(rcupreempt_trace_try_flip_m1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_m1, MARK_NOARGS);
for_each_cpu_mask(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitmb_needed(cpu) &&
per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
- trace_mark(rcupreempt_trace_try_flip_me1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_me1, MARK_NOARGS);
return 0;
}

smp_mb(); /* Ensure that the above checks precede any following flip. */
- trace_mark(rcupreempt_trace_try_flip_m2, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_m2, MARK_NOARGS);
return 1;
}

@@ -700,9 +700,9 @@ static void rcu_try_flip(void)
{
unsigned long oldirq;

- trace_mark(rcupreempt_trace_try_flip_1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_1, MARK_NOARGS);
if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, oldirq))) {
- trace_mark(rcupreempt_trace_try_flip_e1, "NULL");
+ trace_mark(rcupreempt_trace_try_flip_e1, MARK_NOARGS);
return;
}

@@ -754,7 +754,7 @@ void rcu_check_callbacks_rt(int cpu, int
if (rcu_ctrlblk.completed == rdp->completed)
rcu_try_flip();
spin_lock_irqsave(&rdp->lock, oldirq);
- trace_mark(rcupreempt_trace_check_callbacks, "NULL");
+ trace_mark(rcupreempt_trace_check_callbacks, MARK_NOARGS);
__rcu_advance_callbacks(rdp);
spin_unlock_irqrestore(&rdp->lock, oldirq);
}
@@ -774,7 +774,7 @@ void rcu_advance_callbacks_rt(int cpu, i
return;
}
spin_lock_irqsave(&rdp->lock, oldirq);
- trace_mark(rcupreempt_trace_check_callbacks, "NULL");
+ trace_mark(rcupreempt_trace_check_callbacks, MARK_NOARGS);
__rcu_advance_callbacks(rdp);
spin_unlock_irqrestore(&rdp->lock, oldirq);
}
@@ -876,13 +876,13 @@ void rcu_process_callbacks_rt(struct sof
}
rdp->donelist = NULL;
rdp->donetail = &rdp->donelist;
- trace_mark(rcupreempt_trace_done_remove, "NULL");
+ trace_mark(rcupreempt_trace_done_remove, MARK_NOARGS);
spin_unlock_irqrestore(&rdp->lock, flags);
while (list) {
next = list->next;
list->func(list);
list = next;
- trace_mark(rcupreempt_trace_invoke, "NULL");
+ trace_mark(rcupreempt_trace_invoke, MARK_NOARGS);
}
}

@@ -900,7 +900,7 @@ void fastcall call_rcu_preempt(struct rc
__rcu_advance_callbacks(rdp);
*rdp->nexttail = head;
rdp->nexttail = &head->next;
- trace_mark(rcupreempt_trace_next_add, "NULL");
+ trace_mark(rcupreempt_trace_next_add, MARK_NOARGS);
spin_unlock(&rdp->lock);
local_irq_restore(oldirq);
}

2008-02-18 12:48:38

by Jan Kiszka

[permalink] [raw]
Subject: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II

K. Prasad wrote:
> Hi Ingo,
> Please accept these patches into the rt tree which convert the
> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> markers.
>
> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
>
> Along with marker transition, the RCU Tracing infrastructure has also
> been modularised to be built as a kernel module, thereby enabling
> runtime changes to the RCU Tracing infrastructure.
>
> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> rcupreempt.c into markers.
>
> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> rcupreempt-boost.c into markers.
>

I have a technical problem with marker-based RCU tracing: It causes
nasty recursions with latest multi-probe marker patches (sorry, no link
at hand, can be found in latest LTTng, maybe also already in -mm). Those
patches introduce a marker probe trampoline like this:

void marker_probe_cb(const struct marker *mdata, void *call_private,
const char *fmt, ...)
{
va_list args;
char ptype;

/*
* rcu_read_lock does two things : disabling preemption to make sure the
* teardown of the callbacks can be done correctly when they are in
* modules and they insure RCU read coherency.
*/
rcu_read_lock();
preempt_disable();
...

Can we do multi-probe with pure preempt_disable/enable protection? I
guess it's fine with classic RCU, but what about preemptible RCU? Any
suggestion appreciated!

Jan

PS: You will run into this issue if you try to marry latest -rt with
latest LTTng. Straightforward workaround is to comment-out any RCU
trace_mark occurrences.

--
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

2008-02-18 19:48:39

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II

On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> K. Prasad wrote:
> > Hi Ingo,
> > Please accept these patches into the rt tree which convert the
> > existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > markers.
> >
> > These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> >
> > Along with marker transition, the RCU Tracing infrastructure has also
> > been modularised to be built as a kernel module, thereby enabling
> > runtime changes to the RCU Tracing infrastructure.
> >
> > Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > rcupreempt.c into markers.
> >
> > Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > rcupreempt-boost.c into markers.
> >
>
> I have a technical problem with marker-based RCU tracing: It causes
> nasty recursions with latest multi-probe marker patches (sorry, no link
> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> patches introduce a marker probe trampoline like this:
>
> void marker_probe_cb(const struct marker *mdata, void *call_private,
> const char *fmt, ...)
> {
> va_list args;
> char ptype;
>
> /*
> * rcu_read_lock does two things : disabling preemption to make sure the
> * teardown of the callbacks can be done correctly when they are in
> * modules and they insure RCU read coherency.
> */
> rcu_read_lock();
> preempt_disable();
> ...
>
> Can we do multi-probe with pure preempt_disable/enable protection? I
> guess it's fine with classic RCU, but what about preemptible RCU? Any
> suggestion appreciated!

If you substitute synchronize_sched() for synchronize_rcu(), this should
work fine. Of course, this approach would cause RCU tracing to degrade
latencies somewhat in -rt.

If tracing is using call_rcu(), we will need to add a call_sched()
or some such.

Thanx, Paul

> Jan
>
> PS: You will run into this issue if you try to marry latest -rt with
> latest LTTng. Straightforward workaround is to comment-out any RCU
> trace_mark occurrences.
>
> --
> Siemens AG, Corporate Technology, CT SE 2
> Corporate Competence Center Embedded Linux

2008-02-18 20:41:28

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II

* Paul E. McKenney ([email protected]) wrote:
> On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> > K. Prasad wrote:
> > > Hi Ingo,
> > > Please accept these patches into the rt tree which convert the
> > > existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > > markers.
> > >
> > > These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> > >
> > > Along with marker transition, the RCU Tracing infrastructure has also
> > > been modularised to be built as a kernel module, thereby enabling
> > > runtime changes to the RCU Tracing infrastructure.
> > >
> > > Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > > rcupreempt.c into markers.
> > >
> > > Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > > rcupreempt-boost.c into markers.
> > >
> >
> > I have a technical problem with marker-based RCU tracing: It causes
> > nasty recursions with latest multi-probe marker patches (sorry, no link
> > at hand, can be found in latest LTTng, maybe also already in -mm). Those
> > patches introduce a marker probe trampoline like this:
> >
> > void marker_probe_cb(const struct marker *mdata, void *call_private,
> > const char *fmt, ...)
> > {
> > va_list args;
> > char ptype;
> >
> > /*
> > * rcu_read_lock does two things : disabling preemption to make sure the
> > * teardown of the callbacks can be done correctly when they are in
> > * modules and they insure RCU read coherency.
> > */
> > rcu_read_lock();
> > preempt_disable();
> > ...
> >
> > Can we do multi-probe with pure preempt_disable/enable protection? I
> > guess it's fine with classic RCU, but what about preemptible RCU? Any
> > suggestion appreciated!
>
> If you substitute synchronize_sched() for synchronize_rcu(), this should
> work fine. Of course, this approach would cause RCU tracing to degrade
> latencies somewhat in -rt.
>
> If tracing is using call_rcu(), we will need to add a call_sched()
> or some such.
>

Yes, I use call_rcu, so I guess a call_sched would be useful here.

Mathieu

> Thanx, Paul
>
> > Jan
> >
> > PS: You will run into this issue if you try to marry latest -rt with
> > latest LTTng. Straightforward workaround is to comment-out any RCU
> > trace_mark occurrences.
> >
> > --
> > Siemens AG, Corporate Technology, CT SE 2
> > Corporate Competence Center Embedded Linux

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-02-19 16:28:46

by Jan Kiszka

[permalink] [raw]
Subject: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

Paul E. McKenney wrote:
> On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
>> K. Prasad wrote:
>>> Hi Ingo,
>>> Please accept these patches into the rt tree which convert the
>>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
>>> markers.
>>>
>>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
>>>
>>> Along with marker transition, the RCU Tracing infrastructure has also
>>> been modularised to be built as a kernel module, thereby enabling
>>> runtime changes to the RCU Tracing infrastructure.
>>>
>>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
>>> rcupreempt.c into markers.
>>>
>>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
>>> rcupreempt-boost.c into markers.
>>>
>> I have a technical problem with marker-based RCU tracing: It causes
>> nasty recursions with latest multi-probe marker patches (sorry, no link
>> at hand, can be found in latest LTTng, maybe also already in -mm). Those
>> patches introduce a marker probe trampoline like this:
>>
>> void marker_probe_cb(const struct marker *mdata, void *call_private,
>> const char *fmt, ...)
>> {
>> va_list args;
>> char ptype;
>>
>> /*
>> * rcu_read_lock does two things : disabling preemption to make sure the
>> * teardown of the callbacks can be done correctly when they are in
>> * modules and they insure RCU read coherency.
>> */
>> rcu_read_lock();
>> preempt_disable();
>> ...
>>
>> Can we do multi-probe with pure preempt_disable/enable protection? I
>> guess it's fine with classic RCU, but what about preemptible RCU? Any
>> suggestion appreciated!
>
> If you substitute synchronize_sched() for synchronize_rcu(), this should
> work fine. Of course, this approach would cause RCU tracing to degrade
> latencies somewhat in -rt.
>
> If tracing is using call_rcu(), we will need to add a call_sched()
> or some such.

You mean something like "#define call_sched call_rcu_classic"?

I just learned that there is another reason for killing
rcu_read_lock&friends from the marker probes: It can deadlock on -rt
with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
rcu_read_unlock triggers unboost => stuck on rq_lock :( ).

Jan

--
Siemens AG, Corporate Technology, CT SE 2
Corporate Competence Center Embedded Linux

2008-02-19 20:33:45

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

* Jan Kiszka ([email protected]) wrote:
> Paul E. McKenney wrote:
> > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> >> K. Prasad wrote:
> >>> Hi Ingo,
> >>> Please accept these patches into the rt tree which convert the
> >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> >>> markers.
> >>>
> >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> >>>
> >>> Along with marker transition, the RCU Tracing infrastructure has also
> >>> been modularised to be built as a kernel module, thereby enabling
> >>> runtime changes to the RCU Tracing infrastructure.
> >>>
> >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> >>> rcupreempt.c into markers.
> >>>
> >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> >>> rcupreempt-boost.c into markers.
> >>>
> >> I have a technical problem with marker-based RCU tracing: It causes
> >> nasty recursions with latest multi-probe marker patches (sorry, no link
> >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> >> patches introduce a marker probe trampoline like this:
> >>
> >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> >> const char *fmt, ...)
> >> {
> >> va_list args;
> >> char ptype;
> >>
> >> /*
> >> * rcu_read_lock does two things : disabling preemption to make sure the
> >> * teardown of the callbacks can be done correctly when they are in
> >> * modules and they insure RCU read coherency.
> >> */
> >> rcu_read_lock();
> >> preempt_disable();
> >> ...
> >>
> >> Can we do multi-probe with pure preempt_disable/enable protection? I
> >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> >> suggestion appreciated!
> >
> > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > work fine. Of course, this approach would cause RCU tracing to degrade
> > latencies somewhat in -rt.
> >
> > If tracing is using call_rcu(), we will need to add a call_sched()
> > or some such.
>
> You mean something like "#define call_sched call_rcu_classic"?
>
> I just learned that there is another reason for killing
> rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> rcu_read_unlock triggers unboost => stuck on rq_lock :( ).
>

Yep, ok, let's do this :

in include/linux/rcupdate.h

#ifndef PREEMPT_RT
#define call_sched call_rcu
#else
#define call_sched call_rcu_classic
#endif

And I'll adapt the markers accordingly.

Mathieu

> Jan
>
> --
> Siemens AG, Corporate Technology, CT SE 2
> Corporate Competence Center Embedded Linux

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-02-19 21:54:49

by Paul E. McKenney

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

On Tue, Feb 19, 2008 at 05:27:40PM +0100, Jan Kiszka wrote:
> Paul E. McKenney wrote:
> > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> >> K. Prasad wrote:
> >>> Hi Ingo,
> >>> Please accept these patches into the rt tree which convert the
> >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> >>> markers.
> >>>
> >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> >>>
> >>> Along with marker transition, the RCU Tracing infrastructure has also
> >>> been modularised to be built as a kernel module, thereby enabling
> >>> runtime changes to the RCU Tracing infrastructure.
> >>>
> >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> >>> rcupreempt.c into markers.
> >>>
> >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> >>> rcupreempt-boost.c into markers.
> >>>
> >> I have a technical problem with marker-based RCU tracing: It causes
> >> nasty recursions with latest multi-probe marker patches (sorry, no link
> >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> >> patches introduce a marker probe trampoline like this:
> >>
> >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> >> const char *fmt, ...)
> >> {
> >> va_list args;
> >> char ptype;
> >>
> >> /*
> >> * rcu_read_lock does two things : disabling preemption to make sure the
> >> * teardown of the callbacks can be done correctly when they are in
> >> * modules and they insure RCU read coherency.
> >> */
> >> rcu_read_lock();
> >> preempt_disable();
> >> ...
> >>
> >> Can we do multi-probe with pure preempt_disable/enable protection? I
> >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> >> suggestion appreciated!
> >
> > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > work fine. Of course, this approach would cause RCU tracing to degrade
> > latencies somewhat in -rt.
> >
> > If tracing is using call_rcu(), we will need to add a call_sched()
> > or some such.
>
> You mean something like "#define call_sched call_rcu_classic"?

This would work for Classic RCU. For preemptible RCU, a bit more
work is needed.

> I just learned that there is another reason for killing
> rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> rcu_read_unlock triggers unboost => stuck on rq_lock :( ).

OK, good to know. Guess we need a call_sched() for -rt and for
preemptible RCU sooner rather than later...

Thanx, Paul

2008-02-19 22:03:35

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

* Paul E. McKenney ([email protected]) wrote:
> On Tue, Feb 19, 2008 at 05:27:40PM +0100, Jan Kiszka wrote:
> > Paul E. McKenney wrote:
> > > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> > >> K. Prasad wrote:
> > >>> Hi Ingo,
> > >>> Please accept these patches into the rt tree which convert the
> > >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > >>> markers.
> > >>>
> > >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> > >>>
> > >>> Along with marker transition, the RCU Tracing infrastructure has also
> > >>> been modularised to be built as a kernel module, thereby enabling
> > >>> runtime changes to the RCU Tracing infrastructure.
> > >>>
> > >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > >>> rcupreempt.c into markers.
> > >>>
> > >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > >>> rcupreempt-boost.c into markers.
> > >>>
> > >> I have a technical problem with marker-based RCU tracing: It causes
> > >> nasty recursions with latest multi-probe marker patches (sorry, no link
> > >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> > >> patches introduce a marker probe trampoline like this:
> > >>
> > >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> > >> const char *fmt, ...)
> > >> {
> > >> va_list args;
> > >> char ptype;
> > >>
> > >> /*
> > >> * rcu_read_lock does two things : disabling preemption to make sure the
> > >> * teardown of the callbacks can be done correctly when they are in
> > >> * modules and they insure RCU read coherency.
> > >> */
> > >> rcu_read_lock();
> > >> preempt_disable();
> > >> ...
> > >>
> > >> Can we do multi-probe with pure preempt_disable/enable protection? I
> > >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> > >> suggestion appreciated!
> > >
> > > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > > work fine. Of course, this approach would cause RCU tracing to degrade
> > > latencies somewhat in -rt.
> > >
> > > If tracing is using call_rcu(), we will need to add a call_sched()
> > > or some such.
> >
> > You mean something like "#define call_sched call_rcu_classic"?
>
> This would work for Classic RCU. For preemptible RCU, a bit more
> work is needed.
>
> > I just learned that there is another reason for killing
> > rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> > with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> > rcu_read_unlock triggers unboost => stuck on rq_lock :( ).
>
> OK, good to know. Guess we need a call_sched() for -rt and for
> preemptible RCU sooner rather than later...
>

Yup, I would need call_sched() and sched_barrier().

Thanks,

Mathieu

> Thanx, Paul

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-02-19 22:18:32

by Paul E. McKenney

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

On Tue, Feb 19, 2008 at 03:33:26PM -0500, Mathieu Desnoyers wrote:
> * Jan Kiszka ([email protected]) wrote:
> > Paul E. McKenney wrote:
> > > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> > >> K. Prasad wrote:
> > >>> Hi Ingo,
> > >>> Please accept these patches into the rt tree which convert the
> > >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > >>> markers.
> > >>>
> > >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> > >>>
> > >>> Along with marker transition, the RCU Tracing infrastructure has also
> > >>> been modularised to be built as a kernel module, thereby enabling
> > >>> runtime changes to the RCU Tracing infrastructure.
> > >>>
> > >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > >>> rcupreempt.c into markers.
> > >>>
> > >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > >>> rcupreempt-boost.c into markers.
> > >>>
> > >> I have a technical problem with marker-based RCU tracing: It causes
> > >> nasty recursions with latest multi-probe marker patches (sorry, no link
> > >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> > >> patches introduce a marker probe trampoline like this:
> > >>
> > >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> > >> const char *fmt, ...)
> > >> {
> > >> va_list args;
> > >> char ptype;
> > >>
> > >> /*
> > >> * rcu_read_lock does two things : disabling preemption to make sure the
> > >> * teardown of the callbacks can be done correctly when they are in
> > >> * modules and they insure RCU read coherency.
> > >> */
> > >> rcu_read_lock();
> > >> preempt_disable();
> > >> ...
> > >>
> > >> Can we do multi-probe with pure preempt_disable/enable protection? I
> > >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> > >> suggestion appreciated!
> > >
> > > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > > work fine. Of course, this approach would cause RCU tracing to degrade
> > > latencies somewhat in -rt.
> > >
> > > If tracing is using call_rcu(), we will need to add a call_sched()
> > > or some such.
> >
> > You mean something like "#define call_sched call_rcu_classic"?
> >
> > I just learned that there is another reason for killing
> > rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> > with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> > rcu_read_unlock triggers unboost => stuck on rq_lock :( ).
> >
>
> Yep, ok, let's do this :
>
> in include/linux/rcupdate.h
>
> #ifndef PREEMPT_RT
> #define call_sched call_rcu
> #else
> #define call_sched call_rcu_classic
> #endif
>
> And I'll adapt the markers accordingly.

Good point, this will indeed work for 2.6.24-rt1!

Will need to do a bit more for 2.6.25-rc1. My current thought is to
provide a kernel thread that loops over the CPUs, advancing/invoking
per-CPU lists of callbacks as it does so. Then call_sched() would simply
enqueue its callback on the current CPU's next list.

Thanx, Paul

2008-02-19 22:19:34

by Paul E. McKenney

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

On Tue, Feb 19, 2008 at 05:03:18PM -0500, Mathieu Desnoyers wrote:
> * Paul E. McKenney ([email protected]) wrote:
> > On Tue, Feb 19, 2008 at 05:27:40PM +0100, Jan Kiszka wrote:
> > > Paul E. McKenney wrote:
> > > > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> > > >> K. Prasad wrote:
> > > >>> Hi Ingo,
> > > >>> Please accept these patches into the rt tree which convert the
> > > >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > > >>> markers.
> > > >>>
> > > >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> > > >>>
> > > >>> Along with marker transition, the RCU Tracing infrastructure has also
> > > >>> been modularised to be built as a kernel module, thereby enabling
> > > >>> runtime changes to the RCU Tracing infrastructure.
> > > >>>
> > > >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > > >>> rcupreempt.c into markers.
> > > >>>
> > > >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > > >>> rcupreempt-boost.c into markers.
> > > >>>
> > > >> I have a technical problem with marker-based RCU tracing: It causes
> > > >> nasty recursions with latest multi-probe marker patches (sorry, no link
> > > >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> > > >> patches introduce a marker probe trampoline like this:
> > > >>
> > > >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> > > >> const char *fmt, ...)
> > > >> {
> > > >> va_list args;
> > > >> char ptype;
> > > >>
> > > >> /*
> > > >> * rcu_read_lock does two things : disabling preemption to make sure the
> > > >> * teardown of the callbacks can be done correctly when they are in
> > > >> * modules and they insure RCU read coherency.
> > > >> */
> > > >> rcu_read_lock();
> > > >> preempt_disable();
> > > >> ...
> > > >>
> > > >> Can we do multi-probe with pure preempt_disable/enable protection? I
> > > >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> > > >> suggestion appreciated!
> > > >
> > > > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > > > work fine. Of course, this approach would cause RCU tracing to degrade
> > > > latencies somewhat in -rt.
> > > >
> > > > If tracing is using call_rcu(), we will need to add a call_sched()
> > > > or some such.
> > >
> > > You mean something like "#define call_sched call_rcu_classic"?
> >
> > This would work for Classic RCU. For preemptible RCU, a bit more
> > work is needed.
> >
> > > I just learned that there is another reason for killing
> > > rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> > > with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> > > rcu_read_unlock triggers unboost => stuck on rq_lock :( ).
> >
> > OK, good to know. Guess we need a call_sched() for -rt and for
> > preemptible RCU sooner rather than later...
> >
>
> Yup, I would need call_sched() and sched_barrier().

Ah, good point, kprobes does indeed use modules!

Thanx, Paul

2008-02-19 22:32:45

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: Markers: multi-probe locking fun (was: Re: [PATCH 0/2] Markers Implementation for RCU Tracing - Ver II)

* Paul E. McKenney ([email protected]) wrote:
> On Tue, Feb 19, 2008 at 03:33:26PM -0500, Mathieu Desnoyers wrote:
> > * Jan Kiszka ([email protected]) wrote:
> > > Paul E. McKenney wrote:
> > > > On Mon, Feb 18, 2008 at 01:47:31PM +0100, Jan Kiszka wrote:
> > > >> K. Prasad wrote:
> > > >>> Hi Ingo,
> > > >>> Please accept these patches into the rt tree which convert the
> > > >>> existing RCU tracing mechanism for Preempt RCU and RCU Boost into
> > > >>> markers.
> > > >>>
> > > >>> These patches are based upon the 2.6.24-rc5-rt1 kernel tree.
> > > >>>
> > > >>> Along with marker transition, the RCU Tracing infrastructure has also
> > > >>> been modularised to be built as a kernel module, thereby enabling
> > > >>> runtime changes to the RCU Tracing infrastructure.
> > > >>>
> > > >>> Patch [1/2] - Patch that converts the Preempt RCU tracing in
> > > >>> rcupreempt.c into markers.
> > > >>>
> > > >>> Patch [1/2] - Patch that converts the Preempt RCU Boost tracing in
> > > >>> rcupreempt-boost.c into markers.
> > > >>>
> > > >> I have a technical problem with marker-based RCU tracing: It causes
> > > >> nasty recursions with latest multi-probe marker patches (sorry, no link
> > > >> at hand, can be found in latest LTTng, maybe also already in -mm). Those
> > > >> patches introduce a marker probe trampoline like this:
> > > >>
> > > >> void marker_probe_cb(const struct marker *mdata, void *call_private,
> > > >> const char *fmt, ...)
> > > >> {
> > > >> va_list args;
> > > >> char ptype;
> > > >>
> > > >> /*
> > > >> * rcu_read_lock does two things : disabling preemption to make sure the
> > > >> * teardown of the callbacks can be done correctly when they are in
> > > >> * modules and they insure RCU read coherency.
> > > >> */
> > > >> rcu_read_lock();
> > > >> preempt_disable();
> > > >> ...
> > > >>
> > > >> Can we do multi-probe with pure preempt_disable/enable protection? I
> > > >> guess it's fine with classic RCU, but what about preemptible RCU? Any
> > > >> suggestion appreciated!
> > > >
> > > > If you substitute synchronize_sched() for synchronize_rcu(), this should
> > > > work fine. Of course, this approach would cause RCU tracing to degrade
> > > > latencies somewhat in -rt.
> > > >
> > > > If tracing is using call_rcu(), we will need to add a call_sched()
> > > > or some such.
> > >
> > > You mean something like "#define call_sched call_rcu_classic"?
> > >
> > > I just learned that there is another reason for killing
> > > rcu_read_lock&friends from the marker probes: It can deadlock on -rt
> > > with PREEMPT_RCU_BOOST (hit probe inside rq-lock protected region =>
> > > rcu_read_unlock triggers unboost => stuck on rq_lock :( ).
> > >
> >
> > Yep, ok, let's do this :
> >
> > in include/linux/rcupdate.h
> >
> > #ifndef PREEMPT_RT
> > #define call_sched call_rcu
> > #else
> > #define call_sched call_rcu_classic
> > #endif
> >
> > And I'll adapt the markers accordingly.
>
> Good point, this will indeed work for 2.6.24-rt1!
>
> Will need to do a bit more for 2.6.25-rc1. My current thought is to
> provide a kernel thread that loops over the CPUs, advancing/invoking
> per-CPU lists of callbacks as it does so. Then call_sched() would simply
> enqueue its callback on the current CPU's next list.
>

I just noticed that my multiple probes support patch, that just got into
mainline, does not include the fixes I did you asked for (which includes
protecting with rcu_read_lock). I guess Andrew did not notice the patch.
Therefore, I think we'll need to address this pretty soon to make the
markers play nicely with CONFIG_PREEMPT_RCU.

Mathieu

> Thanx, Paul

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68