2020-06-25 05:36:29

by Nicholas Piggin

[permalink] [raw]
Subject: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

On a 144 thread system, `perf ftrace` takes about 20 seconds to start
up, due to calling synchronize_rcu() for each CPU.

cat /proc/108560/stack
0xc0003e7eb336f470
__switch_to+0x2e0/0x480
__wait_rcu_gp+0x20c/0x220
synchronize_rcu+0x9c/0xc0
ring_buffer_reset_cpu+0x88/0x2e0
tracing_reset_online_cpus+0x84/0xe0
tracing_open+0x1d4/0x1f0

On a system with 10x more threads, it starts to become an annoyance.

Batch these up so we disable all the per-cpu buffers first, then
synchronize_rcu() once, then reset each of the buffers. This brings
the time down to about 0.5s.

Cc: Paul McKenney <[email protected]>
Cc: Anton Blanchard <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: [email protected]
Signed-off-by: Nicholas Piggin <[email protected]>
---
include/linux/ring_buffer.h | 1 +
kernel/trace/ring_buffer.c | 85 +++++++++++++++++++++++++++++++------
kernel/trace/trace.c | 4 +-
3 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index c76b2f3b3ac4..136ea0997e6d 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);

void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
+void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
void ring_buffer_reset(struct trace_buffer *buffer);

#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b8e1ca48be50..3f1fd02bd14a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
#define for_each_buffer_cpu(buffer, cpu) \
for_each_cpu(cpu, buffer->cpumask)

+#define for_each_online_buffer_cpu(buffer, cpu) \
+ for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
+
#define TS_SHIFT 27
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
#define TS_DELTA_TEST (~TS_MASK)
@@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
rb_head_page_activate(cpu_buffer);
}

+/* Must have disabled the cpu buffer then done a synchronize_rcu */
+static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+
+ if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
+ goto out;
+
+ arch_spin_lock(&cpu_buffer->lock);
+
+ rb_reset_cpu(cpu_buffer);
+
+ arch_spin_unlock(&cpu_buffer->lock);
+
+ out:
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+}
+
/**
* ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
* @buffer: The ring buffer to reset a per cpu buffer of
@@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
- unsigned long flags;

if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
@@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
/* Make sure all commits have finished */
synchronize_rcu();

- raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ reset_disabled_cpu_buffer(cpu_buffer);

- if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
- goto out;
+ atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->resize_disabled);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);

- arch_spin_lock(&cpu_buffer->lock);
+/**
+ * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
+ * @buffer: The ring buffer to reset a per cpu buffer of
+ * @cpu: The CPU buffer to be reset
+ */
+void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ int cpu;

- rb_reset_cpu(cpu_buffer);
+ for_each_online_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];

- arch_spin_unlock(&cpu_buffer->lock);
+ atomic_inc(&cpu_buffer->resize_disabled);
+ atomic_inc(&cpu_buffer->record_disabled);
+ }

- out:
- raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ /* Make sure all commits have finished */
+ synchronize_rcu();

- atomic_dec(&cpu_buffer->record_disabled);
- atomic_dec(&cpu_buffer->resize_disabled);
+ for_each_online_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+ reset_disabled_cpu_buffer(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ }
}
-EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);

/**
* ring_buffer_reset - reset a ring buffer
@@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
*/
void ring_buffer_reset(struct trace_buffer *buffer)
{
+ struct ring_buffer_per_cpu *cpu_buffer;
int cpu;

- for_each_buffer_cpu(buffer, cpu)
- ring_buffer_reset_cpu(buffer, cpu);
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+ atomic_inc(&cpu_buffer->resize_disabled);
+ atomic_inc(&cpu_buffer->record_disabled);
+ }
+
+ /* Make sure all commits have finished */
+ synchronize_rcu();
+
+ for_each_buffer_cpu(buffer, cpu) {
+ cpu_buffer = buffer->buffers[cpu];
+
+ reset_disabled_cpu_buffer(cpu_buffer);
+
+ atomic_dec(&cpu_buffer->record_disabled);
+ atomic_dec(&cpu_buffer->resize_disabled);
+ }
}
EXPORT_SYMBOL_GPL(ring_buffer_reset);

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec44b0e2a19c..9a26a1c875ae 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
void tracing_reset_online_cpus(struct array_buffer *buf)
{
struct trace_buffer *buffer = buf->buffer;
- int cpu;

if (!buffer)
return;
@@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct array_buffer *buf)

buf->time_start = buffer_ftrace_now(buf, buf->cpu);

- for_each_online_cpu(cpu)
- ring_buffer_reset_cpu(buffer, cpu);
+ ring_buffer_reset_online_cpus(buffer);

ring_buffer_record_enable(buffer);
}
--
2.23.0


2020-06-25 23:43:53

by Anton Blanchard

[permalink] [raw]
Subject: Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

Hi Nick,

> On a 144 thread system, `perf ftrace` takes about 20 seconds to start
> up, due to calling synchronize_rcu() for each CPU.
>
> cat /proc/108560/stack
> 0xc0003e7eb336f470
> __switch_to+0x2e0/0x480
> __wait_rcu_gp+0x20c/0x220
> synchronize_rcu+0x9c/0xc0
> ring_buffer_reset_cpu+0x88/0x2e0
> tracing_reset_online_cpus+0x84/0xe0
> tracing_open+0x1d4/0x1f0
>
> On a system with 10x more threads, it starts to become an annoyance.
>
> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.

It's gone from somewhere more than 10 minutes (I gave up waiting) to
3 seconds. Nice work!

Tested-by: Anton Blanchard <[email protected]>

Thanks,
Anton

> Cc: Paul McKenney <[email protected]>
> Cc: Anton Blanchard <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: [email protected]
> Signed-off-by: Nicholas Piggin <[email protected]>
> ---
> include/linux/ring_buffer.h | 1 +
> kernel/trace/ring_buffer.c | 85
> +++++++++++++++++++++++++++++++------ kernel/trace/trace.c |
> 4 +- 3 files changed, 73 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index c76b2f3b3ac4..136ea0997e6d 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct
> ring_buffer_iter *iter); unsigned long ring_buffer_size(struct
> trace_buffer *buffer, int cpu);
> void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
> void ring_buffer_reset(struct trace_buffer *buffer);
>
> #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index b8e1ca48be50..3f1fd02bd14a 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
> #define for_each_buffer_cpu(buffer, cpu) \
> for_each_cpu(cpu, buffer->cpumask)
>
> +#define for_each_online_buffer_cpu(buffer, cpu) \
> + for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
> +
> #define TS_SHIFT 27
> #define TS_MASK ((1ULL << TS_SHIFT) - 1)
> #define TS_DELTA_TEST (~TS_MASK)
> @@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu
> *cpu_buffer) rb_head_page_activate(cpu_buffer);
> }
>
> +/* Must have disabled the cpu buffer then done a synchronize_rcu */
> +static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu
> *cpu_buffer) +{
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +
> + if (RB_WARN_ON(cpu_buffer,
> local_read(&cpu_buffer->committing)))
> + goto out;
> +
> + arch_spin_lock(&cpu_buffer->lock);
> +
> + rb_reset_cpu(cpu_buffer);
> +
> + arch_spin_unlock(&cpu_buffer->lock);
> +
> + out:
> + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +}
> +
> /**
> * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> * @buffer: The ring buffer to reset a per cpu buffer of
> @@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu
> *cpu_buffer) void ring_buffer_reset_cpu(struct trace_buffer *buffer,
> int cpu) {
> struct ring_buffer_per_cpu *cpu_buffer =
> buffer->buffers[cpu];
> - unsigned long flags;
>
> if (!cpumask_test_cpu(cpu, buffer->cpumask))
> return;
> @@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct
> trace_buffer *buffer, int cpu) /* Make sure all commits have finished
> */ synchronize_rcu();
>
> - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> + reset_disabled_cpu_buffer(cpu_buffer);
>
> - if (RB_WARN_ON(cpu_buffer,
> local_read(&cpu_buffer->committing)))
> - goto out;
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>
> - arch_spin_lock(&cpu_buffer->lock);
> +/**
> + * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> + * @buffer: The ring buffer to reset a per cpu buffer of
> + * @cpu: The CPU buffer to be reset
> + */
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
> +{
> + struct ring_buffer_per_cpu *cpu_buffer;
> + int cpu;
>
> - rb_reset_cpu(cpu_buffer);
> + for_each_online_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
>
> - arch_spin_unlock(&cpu_buffer->lock);
> + atomic_inc(&cpu_buffer->resize_disabled);
> + atomic_inc(&cpu_buffer->record_disabled);
> + }
>
> - out:
> - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> + /* Make sure all commits have finished */
> + synchronize_rcu();
>
> - atomic_dec(&cpu_buffer->record_disabled);
> - atomic_dec(&cpu_buffer->resize_disabled);
> + for_each_online_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + reset_disabled_cpu_buffer(cpu_buffer);
> +
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> + }
> }
> -EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>
> /**
> * ring_buffer_reset - reset a ring buffer
> @@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
> */
> void ring_buffer_reset(struct trace_buffer *buffer)
> {
> + struct ring_buffer_per_cpu *cpu_buffer;
> int cpu;
>
> - for_each_buffer_cpu(buffer, cpu)
> - ring_buffer_reset_cpu(buffer, cpu);
> + for_each_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + atomic_inc(&cpu_buffer->resize_disabled);
> + atomic_inc(&cpu_buffer->record_disabled);
> + }
> +
> + /* Make sure all commits have finished */
> + synchronize_rcu();
> +
> + for_each_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + reset_disabled_cpu_buffer(cpu_buffer);
> +
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> + }
> }
> EXPORT_SYMBOL_GPL(ring_buffer_reset);
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index ec44b0e2a19c..9a26a1c875ae 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct
> array_buffer *buf, int cpu) void tracing_reset_online_cpus(struct
> array_buffer *buf) {
> struct trace_buffer *buffer = buf->buffer;
> - int cpu;
>
> if (!buffer)
> return;
> @@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct
> array_buffer *buf)
> buf->time_start = buffer_ftrace_now(buf, buf->cpu);
>
> - for_each_online_cpu(cpu)
> - ring_buffer_reset_cpu(buffer, cpu);
> + ring_buffer_reset_online_cpus(buffer);
>
> ring_buffer_record_enable(buffer);
> }

2020-06-29 19:42:24

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

On Thu, Jun 25, 2020 at 03:34:03PM +1000, Nicholas Piggin wrote:
> On a 144 thread system, `perf ftrace` takes about 20 seconds to start
> up, due to calling synchronize_rcu() for each CPU.
>
> cat /proc/108560/stack
> 0xc0003e7eb336f470
> __switch_to+0x2e0/0x480
> __wait_rcu_gp+0x20c/0x220
> synchronize_rcu+0x9c/0xc0
> ring_buffer_reset_cpu+0x88/0x2e0
> tracing_reset_online_cpus+0x84/0xe0
> tracing_open+0x1d4/0x1f0
>
> On a system with 10x more threads, it starts to become an annoyance.
>
> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.
>
> Cc: Paul McKenney <[email protected]>
> Cc: Anton Blanchard <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: [email protected]
> Signed-off-by: Nicholas Piggin <[email protected]>

Looks plausible from an RCU viewpoint:

Acked-by: Paul E. McKenney <[email protected]>

> ---
> include/linux/ring_buffer.h | 1 +
> kernel/trace/ring_buffer.c | 85 +++++++++++++++++++++++++++++++------
> kernel/trace/trace.c | 4 +-
> 3 files changed, 73 insertions(+), 17 deletions(-)
>
> diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
> index c76b2f3b3ac4..136ea0997e6d 100644
> --- a/include/linux/ring_buffer.h
> +++ b/include/linux/ring_buffer.h
> @@ -143,6 +143,7 @@ bool ring_buffer_iter_dropped(struct ring_buffer_iter *iter);
> unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu);
>
> void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu);
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer);
> void ring_buffer_reset(struct trace_buffer *buffer);
>
> #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index b8e1ca48be50..3f1fd02bd14a 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -270,6 +270,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
> #define for_each_buffer_cpu(buffer, cpu) \
> for_each_cpu(cpu, buffer->cpumask)
>
> +#define for_each_online_buffer_cpu(buffer, cpu) \
> + for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
> +
> #define TS_SHIFT 27
> #define TS_MASK ((1ULL << TS_SHIFT) - 1)
> #define TS_DELTA_TEST (~TS_MASK)
> @@ -4484,6 +4487,26 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
> rb_head_page_activate(cpu_buffer);
> }
>
> +/* Must have disabled the cpu buffer then done a synchronize_rcu */
> +static void reset_disabled_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
> +{
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> +
> + if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
> + goto out;
> +
> + arch_spin_lock(&cpu_buffer->lock);
> +
> + rb_reset_cpu(cpu_buffer);
> +
> + arch_spin_unlock(&cpu_buffer->lock);
> +
> + out:
> + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> +}
> +
> /**
> * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> * @buffer: The ring buffer to reset a per cpu buffer of
> @@ -4492,7 +4515,6 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
> void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
> {
> struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
> - unsigned long flags;
>
> if (!cpumask_test_cpu(cpu, buffer->cpumask))
> return;
> @@ -4503,24 +4525,42 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu)
> /* Make sure all commits have finished */
> synchronize_rcu();
>
> - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
> + reset_disabled_cpu_buffer(cpu_buffer);
>
> - if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing)))
> - goto out;
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> +}
> +EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>
> - arch_spin_lock(&cpu_buffer->lock);
> +/**
> + * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
> + * @buffer: The ring buffer to reset a per cpu buffer of
> + * @cpu: The CPU buffer to be reset
> + */
> +void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
> +{
> + struct ring_buffer_per_cpu *cpu_buffer;
> + int cpu;
>
> - rb_reset_cpu(cpu_buffer);
> + for_each_online_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
>
> - arch_spin_unlock(&cpu_buffer->lock);
> + atomic_inc(&cpu_buffer->resize_disabled);
> + atomic_inc(&cpu_buffer->record_disabled);
> + }
>
> - out:
> - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
> + /* Make sure all commits have finished */
> + synchronize_rcu();
>
> - atomic_dec(&cpu_buffer->record_disabled);
> - atomic_dec(&cpu_buffer->resize_disabled);
> + for_each_online_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + reset_disabled_cpu_buffer(cpu_buffer);
> +
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> + }
> }
> -EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
>
> /**
> * ring_buffer_reset - reset a ring buffer
> @@ -4528,10 +4568,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
> */
> void ring_buffer_reset(struct trace_buffer *buffer)
> {
> + struct ring_buffer_per_cpu *cpu_buffer;
> int cpu;
>
> - for_each_buffer_cpu(buffer, cpu)
> - ring_buffer_reset_cpu(buffer, cpu);
> + for_each_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + atomic_inc(&cpu_buffer->resize_disabled);
> + atomic_inc(&cpu_buffer->record_disabled);
> + }
> +
> + /* Make sure all commits have finished */
> + synchronize_rcu();
> +
> + for_each_buffer_cpu(buffer, cpu) {
> + cpu_buffer = buffer->buffers[cpu];
> +
> + reset_disabled_cpu_buffer(cpu_buffer);
> +
> + atomic_dec(&cpu_buffer->record_disabled);
> + atomic_dec(&cpu_buffer->resize_disabled);
> + }
> }
> EXPORT_SYMBOL_GPL(ring_buffer_reset);
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index ec44b0e2a19c..9a26a1c875ae 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2003,7 +2003,6 @@ static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
> void tracing_reset_online_cpus(struct array_buffer *buf)
> {
> struct trace_buffer *buffer = buf->buffer;
> - int cpu;
>
> if (!buffer)
> return;
> @@ -2015,8 +2014,7 @@ void tracing_reset_online_cpus(struct array_buffer *buf)
>
> buf->time_start = buffer_ftrace_now(buf, buf->cpu);
>
> - for_each_online_cpu(cpu)
> - ring_buffer_reset_cpu(buffer, cpu);
> + ring_buffer_reset_online_cpus(buffer);
>
> ring_buffer_record_enable(buffer);
> }
> --
> 2.23.0
>

2020-06-29 20:31:09

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

On Mon, 29 Jun 2020 08:35:11 -0700
"Paul E. McKenney" <[email protected]> wrote:

> Looks plausible from an RCU viewpoint:
>
> Acked-by: Paul E. McKenney <[email protected]>

Thanks Nicholas, Anton and Paul,

I'll pull this in and start testing it.

-- Steve

2020-06-29 22:18:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

On Thu, 25 Jun 2020 15:34:03 +1000
Nicholas Piggin <[email protected]> wrote:

> Batch these up so we disable all the per-cpu buffers first, then
> synchronize_rcu() once, then reset each of the buffers. This brings
> the time down to about 0.5s.

After applying this patch, running tools/testing/selftests/ftracetest
went from 5 minutes and 35 seconds to 5 minutes 5 seconds to complete
on my 4 core (8 with hyperthreading) machine! That's almost a 10% drop!

Thanks, I'm definitely applying this for the next merge window.

-- Steve

2020-06-30 02:10:30

by Nicholas Piggin

[permalink] [raw]
Subject: Re: [PATCH] ring-buffer: speed up buffer resets by avoiding synchronize_rcu for each CPU

Excerpts from Steven Rostedt's message of June 30, 2020 8:16 am:
> On Thu, 25 Jun 2020 15:34:03 +1000
> Nicholas Piggin <[email protected]> wrote:
>
>> Batch these up so we disable all the per-cpu buffers first, then
>> synchronize_rcu() once, then reset each of the buffers. This brings
>> the time down to about 0.5s.
>
> After applying this patch, running tools/testing/selftests/ftracetest
> went from 5 minutes and 35 seconds to 5 minutes 5 seconds to complete
> on my 4 core (8 with hyperthreading) machine! That's almost a 10% drop!
>
> Thanks, I'm definitely applying this for the next merge window.

Cool, always good when a big system optimisation helps small ones as
well. Thanks all.

Thanks,
Nick