2023-02-28 17:59:54

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 0/3] Improve trace/ring_buffer.c

This series improves ring_buffer.c by changing the type of some
static functions to void or bool and uses try_cmpxchg instead of
cmpxchg (*ptr, old, new) == old where appropriate.

Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>

Uros Bizjak (3):
ring_buffer: Change some static functions to void
ring_buffer: Change some static functions to bool
ring_buffer: Use try_cmpxchg instead of cmpxchg

kernel/trace/ring_buffer.c | 89 ++++++++++++++++----------------------
1 file changed, 37 insertions(+), 52 deletions(-)

--
2.39.2



2023-02-28 18:00:03

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 1/3] ring_buffer: Change some static functions to void

The results of some static functions are not used. Change the
type of these function to void and remove unnecessary returns.

No functional change intended.

Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Signed-off-by: Uros Bizjak <[email protected]>
---
kernel/trace/ring_buffer.c | 22 +++++++---------------
1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index af50d931b020..05fdc92554df 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1569,15 +1569,12 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
}
}

-static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
+static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *bpage)
{
unsigned long val = (unsigned long)bpage;

- if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
- return 1;
-
- return 0;
+ RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
}

/**
@@ -1587,30 +1584,28 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
* As a safety measure we check to make sure the data pages have not
* been corrupted.
*/
-static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *head = rb_list_head(cpu_buffer->pages);
struct list_head *tmp;

if (RB_WARN_ON(cpu_buffer,
rb_list_head(rb_list_head(head->next)->prev) != head))
- return -1;
+ return;

if (RB_WARN_ON(cpu_buffer,
rb_list_head(rb_list_head(head->prev)->next) != head))
- return -1;
+ return;

for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
if (RB_WARN_ON(cpu_buffer,
rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
- return -1;
+ return;

if (RB_WARN_ON(cpu_buffer,
rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
- return -1;
+ return;
}
-
- return 0;
}

static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
@@ -4500,7 +4495,6 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
default:
RB_WARN_ON(cpu_buffer, 1);
}
- return;
}

static void
@@ -4531,7 +4525,6 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
default:
RB_WARN_ON(iter->cpu_buffer, 1);
}
- return;
}

static struct buffer_page *
@@ -4946,7 +4939,6 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
{
if (likely(locked))
raw_spin_unlock(&cpu_buffer->reader_lock);
- return;
}

/**
--
2.39.2


2023-02-28 18:00:11

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 2/3] ring_buffer: Change some static functions to bool

The return values of some functions are of boolean type. Change the
type of these function to bool and adjust their return values. Also
change type of some internal varibles to bool.

No functional change intended.

Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Signed-off-by: Uros Bizjak <[email protected]>
---
kernel/trace/ring_buffer.c | 47 ++++++++++++++++++--------------------
1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 05fdc92554df..4188af7d4cfe 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -163,7 +163,7 @@ enum {
#define extended_time(event) \
(event->type_len >= RINGBUF_TYPE_TIME_EXTEND)

-static inline int rb_null_event(struct ring_buffer_event *event)
+static inline bool rb_null_event(struct ring_buffer_event *event)
{
return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
}
@@ -367,11 +367,9 @@ static void free_buffer_page(struct buffer_page *bpage)
/*
* We need to fit the time_stamp delta into 27 bits.
*/
-static inline int test_time_stamp(u64 delta)
+static inline bool test_time_stamp(u64 delta)
{
- if (delta & TS_DELTA_TEST)
- return 1;
- return 0;
+ return !!(delta & TS_DELTA_TEST);
}

#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
@@ -700,7 +698,7 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
return ret == expect;
}

-static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
+static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
{
unsigned long cnt, top, bottom, msb;
unsigned long cnt2, top2, bottom2, msb2;
@@ -1490,7 +1488,7 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
return NULL;
}

-static int rb_head_page_replace(struct buffer_page *old,
+static bool rb_head_page_replace(struct buffer_page *old,
struct buffer_page *new)
{
unsigned long *ptr = (unsigned long *)&old->list.prev->next;
@@ -1917,7 +1915,7 @@ static inline unsigned long rb_page_write(struct buffer_page *bpage)
return local_read(&bpage->write) & RB_WRITE_MASK;
}

-static int
+static bool
rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
{
struct list_head *tail_page, *to_remove, *next_page;
@@ -2030,12 +2028,13 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
return nr_removed == 0;
}

-static int
+static bool
rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
struct list_head *pages = &cpu_buffer->new_pages;
- int retries, success;
+ int retries;
unsigned long flags;
+ bool success;

/* Can be called at early boot up, where interrupts must not been enabled */
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2054,7 +2053,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
* spinning.
*/
retries = 10;
- success = 0;
+ success = false;
while (retries--) {
struct list_head *head_page, *prev_page, *r;
struct list_head *last_page, *first_page;
@@ -2083,7 +2082,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
* pointer to point to end of list
*/
head_page->prev = last_page;
- success = 1;
+ success = true;
break;
}
}
@@ -2111,7 +2110,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)

static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
- int success;
+ bool success;

if (cpu_buffer->nr_pages_to_update > 0)
success = rb_insert_pages(cpu_buffer);
@@ -2994,7 +2993,7 @@ static u64 rb_time_delta(struct ring_buffer_event *event)
}
}

-static inline int
+static inline bool
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
{
@@ -3015,7 +3014,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
delta = rb_time_delta(event);

if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp))
- return 0;
+ return false;

/* Make sure the write stamp is read before testing the location */
barrier();
@@ -3028,7 +3027,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
/* Something came in, can't discard */
if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
write_stamp, write_stamp - delta))
- return 0;
+ return false;

/*
* It's possible that the event time delta is zero
@@ -3061,12 +3060,12 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
if (index == old_index) {
/* update counters */
local_sub(event_length, &cpu_buffer->entries_bytes);
- return 1;
+ return true;
}
}

/* could not discard */
- return 0;
+ return false;
}

static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
@@ -3281,7 +3280,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* Note: The TRANSITION bit only handles a single transition between context.
*/

-static __always_inline int
+static __always_inline bool
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
unsigned int val = cpu_buffer->current_context;
@@ -3298,14 +3297,14 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
bit = RB_CTX_TRANSITION;
if (val & (1 << (bit + cpu_buffer->nest))) {
do_ring_buffer_record_recursion();
- return 1;
+ return true;
}
}

val |= (1 << (bit + cpu_buffer->nest));
cpu_buffer->current_context = val;

- return 0;
+ return false;
}

static __always_inline void
@@ -5408,9 +5407,8 @@ bool ring_buffer_empty(struct trace_buffer *buffer)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
- bool dolock;
+ bool dolock, ret;
int cpu;
- int ret;

/* yes this is racy, but if you don't like the race, lock the buffer */
for_each_buffer_cpu(buffer, cpu) {
@@ -5438,8 +5436,7 @@ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
- bool dolock;
- int ret;
+ bool dolock, ret;

if (!cpumask_test_cpu(cpu, buffer->cpumask))
return true;
--
2.39.2


2023-02-28 18:00:19

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old.
x86 CMPXCHG instruction returns success in ZF flag, so this change
saves a compare after cmpxchg (and related move instruction in
front of cmpxchg).

Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
fails. There is no need to re-read the value in the loop.

No functional change intended.

Cc: Steven Rostedt <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Signed-off-by: Uros Bizjak <[email protected]>
---
kernel/trace/ring_buffer.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4188af7d4cfe..8f0ef7d12ddd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1493,14 +1493,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
{
unsigned long *ptr = (unsigned long *)&old->list.prev->next;
unsigned long val;
- unsigned long ret;

val = *ptr & ~RB_FLAG_MASK;
val |= RB_PAGE_HEAD;

- ret = cmpxchg(ptr, val, (unsigned long)&new->list);
-
- return ret == val;
+ return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
}

/*
@@ -2055,7 +2052,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
retries = 10;
success = false;
while (retries--) {
- struct list_head *head_page, *prev_page, *r;
+ struct list_head *head_page, *prev_page;
struct list_head *last_page, *first_page;
struct list_head *head_page_with_bit;

@@ -2073,9 +2070,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
last_page->next = head_page_with_bit;
first_page->prev = prev_page;

- r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
-
- if (r == head_page_with_bit) {
+ if (try_cmpxchg(&prev_page->next,
+ &head_page_with_bit, first_page)) {
/*
* yay, we replaced the page pointer to our new list,
* now, we just have to update to head page's prev
@@ -4061,10 +4057,10 @@ void ring_buffer_record_off(struct trace_buffer *buffer)
unsigned int rd;
unsigned int new_rd;

+ rd = atomic_read(&buffer->record_disabled);
do {
- rd = atomic_read(&buffer->record_disabled);
new_rd = rd | RB_BUFFER_OFF;
- } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
+ } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd));
}
EXPORT_SYMBOL_GPL(ring_buffer_record_off);

@@ -4084,10 +4080,10 @@ void ring_buffer_record_on(struct trace_buffer *buffer)
unsigned int rd;
unsigned int new_rd;

+ rd = atomic_read(&buffer->record_disabled);
do {
- rd = atomic_read(&buffer->record_disabled);
new_rd = rd & ~RB_BUFFER_OFF;
- } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
+ } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd));
}
EXPORT_SYMBOL_GPL(ring_buffer_record_on);

--
2.39.2


2023-02-28 19:35:56

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 0/3] Improve trace/ring_buffer.c

On Tue, 28 Feb 2023 18:59:26 +0100
Uros Bizjak <[email protected]> wrote:

> This series improves ring_buffer.c by changing the type of some
> static functions to void or bool and

Well, it's not really an improvement unless it has a functional change. But
I'm OK with taking these.

> uses try_cmpxchg instead of
> cmpxchg (*ptr, old, new) == old where appropriate.

Now, the try_cmpxchg() could be an improvement on x86.

-- Steve

>
> Cc: Steven Rostedt <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
>
> Uros Bizjak (3):
> ring_buffer: Change some static functions to void
> ring_buffer: Change some static functions to bool
> ring_buffer: Use try_cmpxchg instead of cmpxchg
>
> kernel/trace/ring_buffer.c | 89 ++++++++++++++++----------------------
> 1 file changed, 37 insertions(+), 52 deletions(-)
>


2023-02-28 21:45:32

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Tue, 28 Feb 2023 18:59:29 +0100
Uros Bizjak <[email protected]> wrote:

> Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old.
> x86 CMPXCHG instruction returns success in ZF flag, so this change
> saves a compare after cmpxchg (and related move instruction in
> front of cmpxchg).
>
> Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
> fails. There is no need to re-read the value in the loop.
>
> No functional change intended.

As I mentioned in the RCU thread, I have issues with some of the changes
here.

>
> Cc: Steven Rostedt <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Signed-off-by: Uros Bizjak <[email protected]>
> ---
> kernel/trace/ring_buffer.c | 20 ++++++++------------
> 1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 4188af7d4cfe..8f0ef7d12ddd 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -1493,14 +1493,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
> {
> unsigned long *ptr = (unsigned long *)&old->list.prev->next;
> unsigned long val;
> - unsigned long ret;
>
> val = *ptr & ~RB_FLAG_MASK;
> val |= RB_PAGE_HEAD;
>
> - ret = cmpxchg(ptr, val, (unsigned long)&new->list);
> -
> - return ret == val;
> + return try_cmpxchg(ptr, &val, (unsigned long)&new->list);

No, val should not be updated.

> }
>
> /*
> @@ -2055,7 +2052,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> retries = 10;
> success = false;
> while (retries--) {
> - struct list_head *head_page, *prev_page, *r;
> + struct list_head *head_page, *prev_page;
> struct list_head *last_page, *first_page;
> struct list_head *head_page_with_bit;
>
> @@ -2073,9 +2070,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> last_page->next = head_page_with_bit;
> first_page->prev = prev_page;
>
> - r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
> -
> - if (r == head_page_with_bit) {
> + if (try_cmpxchg(&prev_page->next,
> + &head_page_with_bit, first_page)) {

No. head_page_with_bit should not be updated.

> /*
> * yay, we replaced the page pointer to our new list,
> * now, we just have to update to head page's prev
> @@ -4061,10 +4057,10 @@ void ring_buffer_record_off(struct trace_buffer *buffer)
> unsigned int rd;
> unsigned int new_rd;
>
> + rd = atomic_read(&buffer->record_disabled);
> do {
> - rd = atomic_read(&buffer->record_disabled);
> new_rd = rd | RB_BUFFER_OFF;
> - } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
> + } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd));

This change is OK.

> }
> EXPORT_SYMBOL_GPL(ring_buffer_record_off);
>
> @@ -4084,10 +4080,10 @@ void ring_buffer_record_on(struct trace_buffer *buffer)
> unsigned int rd;
> unsigned int new_rd;
>
> + rd = atomic_read(&buffer->record_disabled);
> do {
> - rd = atomic_read(&buffer->record_disabled);
> new_rd = rd & ~RB_BUFFER_OFF;
> - } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
> + } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd));

And so is this one.

So I will not take this patch as is.

-- Steve

> }
> EXPORT_SYMBOL_GPL(ring_buffer_record_on);
>


2023-02-28 22:55:30

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/3] ring_buffer: Change some static functions to void

On Tue, 28 Feb 2023 18:59:27 +0100
Uros Bizjak <[email protected]> wrote:

> The results of some static functions are not used. Change the
> type of these function to void and remove unnecessary returns.
>
> No functional change intended.

NAK, instead of dropping the errors, please handle it on the caller side.

Thank you,

>
> Cc: Steven Rostedt <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Signed-off-by: Uros Bizjak <[email protected]>
> ---
> kernel/trace/ring_buffer.c | 22 +++++++---------------
> 1 file changed, 7 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index af50d931b020..05fdc92554df 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -1569,15 +1569,12 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
> }
> }
>
> -static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
> +static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
> struct buffer_page *bpage)
> {
> unsigned long val = (unsigned long)bpage;
>
> - if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
> - return 1;
> -
> - return 0;
> + RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK);
> }
>
> /**
> @@ -1587,30 +1584,28 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
> * As a safety measure we check to make sure the data pages have not
> * been corrupted.
> */
> -static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
> +static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
> {
> struct list_head *head = rb_list_head(cpu_buffer->pages);
> struct list_head *tmp;
>
> if (RB_WARN_ON(cpu_buffer,
> rb_list_head(rb_list_head(head->next)->prev) != head))
> - return -1;
> + return;
>
> if (RB_WARN_ON(cpu_buffer,
> rb_list_head(rb_list_head(head->prev)->next) != head))
> - return -1;
> + return;
>
> for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) {
> if (RB_WARN_ON(cpu_buffer,
> rb_list_head(rb_list_head(tmp->next)->prev) != tmp))
> - return -1;
> + return;
>
> if (RB_WARN_ON(cpu_buffer,
> rb_list_head(rb_list_head(tmp->prev)->next) != tmp))
> - return -1;
> + return;
> }
> -
> - return 0;
> }
>
> static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
> @@ -4500,7 +4495,6 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
> default:
> RB_WARN_ON(cpu_buffer, 1);
> }
> - return;
> }
>
> static void
> @@ -4531,7 +4525,6 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
> default:
> RB_WARN_ON(iter->cpu_buffer, 1);
> }
> - return;
> }
>
> static struct buffer_page *
> @@ -4946,7 +4939,6 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
> {
> if (likely(locked))
> raw_spin_unlock(&cpu_buffer->reader_lock);
> - return;
> }
>
> /**
> --
> 2.39.2
>


--
Masami Hiramatsu (Google) <[email protected]>

2023-03-01 08:36:04

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 0/3] Improve trace/ring_buffer.c

On Tue, Feb 28, 2023 at 8:35 PM Steven Rostedt <[email protected]> wrote:
>
> On Tue, 28 Feb 2023 18:59:26 +0100
> Uros Bizjak <[email protected]> wrote:
>
> > This series improves ring_buffer.c by changing the type of some
> > static functions to void or bool and
>
> Well, it's not really an improvement unless it has a functional change. But
> I'm OK with taking these.

"No functional changes intended" claim should be taken in the sense
that the same functionality is achieved in a more optimal way. As a
trivial example, changing some functions to bool eliminates many
unnecessary zero-extensions and results in smaller code:

size ring_buffer-*.o
text data bss dec hex filename
25599 1762 4 27365 6ae5 ring_buffer-vanilla.o
25551 1762 4 27317 6ab5 ring_buffer-bool.o

Please also note that setting the output value with "SETcc r8" results
in a partial register stall on x86 when returning r32. The compiler
knows how to handle this issue (using register dependency breaking XOR
in front of SETcc), but it is better to avoid the issue altogether.
So, there are also secondary benefits of the above "No functional
changes intended" change, besides lower code size.

> > uses try_cmpxchg instead of
> > cmpxchg (*ptr, old, new) == old where appropriate.
>
> Now, the try_cmpxchg() could be an improvement on x86.

True, the concrete example is e.g. ring_buffer_record_off, where the
cmpxchg loop improves from:

78: 8b 57 08 mov 0x8(%rdi),%edx
7b: 89 d6 mov %edx,%esi
7d: 89 d0 mov %edx,%eax
7f: 81 ce 00 00 10 00 or $0x100000,%esi
85: f0 0f b1 31 lock cmpxchg %esi,(%rcx)
89: 39 d0 cmp %edx,%eax
8b: 75 eb jne 78 <ring_buffer_record_off+0x8>

to:

1bb: 89 c2 mov %eax,%edx
1bd: 81 ca 00 00 10 00 or $0x100000,%edx
1c3: f0 0f b1 17 lock cmpxchg %edx,(%rdi)
1c7: 75 f2 jne 1bb <ring_buffer_record_off+0xb>

As can be demonstrated above, the move to a temporary reg and the
comparison with said temporary are both eliminated.

The above code is generated with gcc-10.3.1 to show the direct
benefits of the change. gcc-12+ is able to use likely/unlikely
annotations inside try_cmpxchg definition and generates fast paths
through the code (as mentioned by you in the RCU patch-set thread).

Please also note that try_cmpxchg generates better code also for
targets that do not define try_cmpxchg and use fallback code, as
reported in [1] and follow-up messages.

The API of try_cmpxhg is written in such a way that benefits loops and
also linear code. I will discuss this in the reply of PATCH 3/3.

[1] https://lore.kernel.org/lkml/[email protected]/

Uros.

2023-03-01 08:47:07

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 1/3] ring_buffer: Change some static functions to void

On Tue, Feb 28, 2023 at 11:55 PM Masami Hiramatsu <[email protected]> wrote:
>
> On Tue, 28 Feb 2023 18:59:27 +0100
> Uros Bizjak <[email protected]> wrote:
>
> > The results of some static functions are not used. Change the
> > type of these function to void and remove unnecessary returns.
> >
> > No functional change intended.
>
> NAK, instead of dropping the errors, please handle it on the caller side.

I was under the impression that the intention of these two functions
is to warn if there is any corruption in data pages detected. Please
note that the patch has no effect on code size, as the compiler is
smart enough to drop unused return values by itself. So, the change is
mostly cosmetic as I was just bothered by unused returns. I'm not
versed enough in the code to introduce additional error handling, so
considering its minimal impact, the patch can just be dropped.

Uros.

2023-03-01 09:37:46

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Tue, Feb 28, 2023 at 10:43 PM Steven Rostedt <[email protected]> wrote:
>
> On Tue, 28 Feb 2023 18:59:29 +0100
> Uros Bizjak <[email protected]> wrote:
>
> > Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old.
> > x86 CMPXCHG instruction returns success in ZF flag, so this change
> > saves a compare after cmpxchg (and related move instruction in
> > front of cmpxchg).
> >
> > Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
> > fails. There is no need to re-read the value in the loop.
> >
> > No functional change intended.
>
> As I mentioned in the RCU thread, I have issues with some of the changes
> here.
>
> >
> > Cc: Steven Rostedt <[email protected]>
> > Cc: Masami Hiramatsu <[email protected]>
> > Signed-off-by: Uros Bizjak <[email protected]>
> > ---
> > kernel/trace/ring_buffer.c | 20 ++++++++------------
> > 1 file changed, 8 insertions(+), 12 deletions(-)
> >
> > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> > index 4188af7d4cfe..8f0ef7d12ddd 100644
> > --- a/kernel/trace/ring_buffer.c
> > +++ b/kernel/trace/ring_buffer.c
> > @@ -1493,14 +1493,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
> > {
> > unsigned long *ptr = (unsigned long *)&old->list.prev->next;
> > unsigned long val;
> > - unsigned long ret;
> >
> > val = *ptr & ~RB_FLAG_MASK;
> > val |= RB_PAGE_HEAD;
> >
> > - ret = cmpxchg(ptr, val, (unsigned long)&new->list);
> > -
> > - return ret == val;
> > + return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
>
> No, val should not be updated.

Please see the definition of try_cmpxchg. The definition is written in
such a way that benefits loops as well as linear code and in the later
case depends on the compiler to eliminate assignment to val as a dead
assignment.

The above change was done under the assumption that val is unused
after try_cmpxchg, and can be considered as a temporary
[Alternatively, the value could be copied to a local temporary and a
pointer to this local temporary could be passed to try_cmpxchg
instead. Compiler is smart enough to eliminate the assignment in any
case.]

Even in the linear code, the change has considerable effect.
rb_head_page_replace is inlined in rb_get_reader_page and gcc-10.3.1
improves code from:

ef8: 48 8b 0e mov (%rsi),%rcx
efb: 48 83 e1 fc and $0xfffffffffffffffc,%rcx
eff: 48 83 c9 01 or $0x1,%rcx
f03: 48 89 c8 mov %rcx,%rax
f06: f0 48 0f b1 3e lock cmpxchg %rdi,(%rsi)
f0b: 48 39 c1 cmp %rax,%rcx
f0e: 74 3b je f4b <rb_get_reader_page+0x13b>

to:

ed8: 48 8b 01 mov (%rcx),%rax
edb: 48 83 e0 fc and $0xfffffffffffffffc,%rax
edf: 48 83 c8 01 or $0x1,%rax
ee3: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
ee8: 74 3b je f25 <rb_get_reader_page+0x135>

Again, even in linear code the change is able to eliminate the
assignment to a temporary reg and the compare. Please note that there
is no move *from* %rax register after cmpxchg, so the compiler
correctly eliminated unused assignment.

>
> > }
> >
> > /*
> > @@ -2055,7 +2052,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > retries = 10;
> > success = false;
> > while (retries--) {
> > - struct list_head *head_page, *prev_page, *r;
> > + struct list_head *head_page, *prev_page;
> > struct list_head *last_page, *first_page;
> > struct list_head *head_page_with_bit;
> >
> > @@ -2073,9 +2070,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > last_page->next = head_page_with_bit;
> > first_page->prev = prev_page;
> >
> > - r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
> > -
> > - if (r == head_page_with_bit) {
> > + if (try_cmpxchg(&prev_page->next,
> > + &head_page_with_bit, first_page)) {
>
> No. head_page_with_bit should not be updated.

As above, head_page_with_bit should be considered as a temporary, it
is initialized a couple of lines above cmpxchg and unused after. The
gcc-10.3.1 compiler even found some more optimization opportunities
and reordered the code from:

1364: 4d 8b 86 38 01 00 00 mov 0x138(%r14),%r8
136b: 48 83 ce 01 or $0x1,%rsi
136f: 48 89 f0 mov %rsi,%rax
1372: 49 89 30 mov %rsi,(%r8)
1375: 48 89 4f 08 mov %rcx,0x8(%rdi)
1379: f0 48 0f b1 39 lock cmpxchg %rdi,(%rcx)
137e: 48 39 c6 cmp %rax,%rsi
1381: 74 78 je 13fb <rb_insert_pages+0xdb>

to:

1343: 48 83 c8 01 or $0x1,%rax
1347: 48 8b bb 38 01 00 00 mov 0x138(%rbx),%rdi
134e: 48 89 07 mov %rax,(%rdi)
1351: 48 89 4e 08 mov %rcx,0x8(%rsi)
1355: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
135a: 41 0f 94 c7 sete %r15b
135e: 75 2f jne 138f <rb_insert_pages+0x8f>

Please also note SETE insn in the above code, this is how the
"success" variable is handled in the loop. So, besides code size
improvement, other secondary improvements can be expected from the
change, too.

I think that the above examples demonstrate various improvements that
can be achieved with effectively a one-line, almost mechanical change
to the code, even in linear code. It would be unfortunate to not
consider them.

Uros.

2023-03-01 15:49:35

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 4:37 AM Uros Bizjak <[email protected]> wrote:
>
> On Tue, Feb 28, 2023 at 10:43 PM Steven Rostedt <[email protected]> wrote:
> >
> > On Tue, 28 Feb 2023 18:59:29 +0100
> > Uros Bizjak <[email protected]> wrote:
> >
> > > Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old.
> > > x86 CMPXCHG instruction returns success in ZF flag, so this change
> > > saves a compare after cmpxchg (and related move instruction in
> > > front of cmpxchg).
> > >
> > > Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
> > > fails. There is no need to re-read the value in the loop.
> > >
> > > No functional change intended.
> >
> > As I mentioned in the RCU thread, I have issues with some of the changes
> > here.
> >
> > >
> > > Cc: Steven Rostedt <[email protected]>
> > > Cc: Masami Hiramatsu <[email protected]>
> > > Signed-off-by: Uros Bizjak <[email protected]>
> > > ---
> > > kernel/trace/ring_buffer.c | 20 ++++++++------------
> > > 1 file changed, 8 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> > > index 4188af7d4cfe..8f0ef7d12ddd 100644
> > > --- a/kernel/trace/ring_buffer.c
> > > +++ b/kernel/trace/ring_buffer.c
> > > @@ -1493,14 +1493,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
> > > {
> > > unsigned long *ptr = (unsigned long *)&old->list.prev->next;
> > > unsigned long val;
> > > - unsigned long ret;
> > >
> > > val = *ptr & ~RB_FLAG_MASK;
> > > val |= RB_PAGE_HEAD;
> > >
> > > - ret = cmpxchg(ptr, val, (unsigned long)&new->list);
> > > -
> > > - return ret == val;
> > > + return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
> >
> > No, val should not be updated.
>
> Please see the definition of try_cmpxchg. The definition is written in
> such a way that benefits loops as well as linear code and in the later
> case depends on the compiler to eliminate assignment to val as a dead
> assignment.
>
> The above change was done under the assumption that val is unused
> after try_cmpxchg, and can be considered as a temporary
> [Alternatively, the value could be copied to a local temporary and a
> pointer to this local temporary could be passed to try_cmpxchg
> instead. Compiler is smart enough to eliminate the assignment in any
> case.]

If I understood Steve correctly, I think the "misleading" part is that
you are passing a variable by address to try_cmpxchg() but not really
modifying it (unlike in the loop patterns).

Perhaps it is more meaningful to have an API that looks like:
bool cmpxchg_succeeded(TYPE ptr, TYPE old, TYPE new)
Where old is not a pointer (unlike try_cmpxchg), and the API returns bool.

Both cleaner to read and has the optimization you want, I believe.

However, the other point is, this is useful only for slow paths, but
at least cmpxchg_succeeded() is more readable and less "misleading"
than try_cmpxchg() IMO.

- Joel

[...]

2023-03-01 15:50:55

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 10:49 AM Joel Fernandes <[email protected]> wrote:
>
> On Wed, Mar 1, 2023 at 4:37 AM Uros Bizjak <[email protected]> wrote:
> >
> > On Tue, Feb 28, 2023 at 10:43 PM Steven Rostedt <[email protected]> wrote:
> > >
> > > On Tue, 28 Feb 2023 18:59:29 +0100
> > > Uros Bizjak <[email protected]> wrote:
> > >
> > > > Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old.
> > > > x86 CMPXCHG instruction returns success in ZF flag, so this change
> > > > saves a compare after cmpxchg (and related move instruction in
> > > > front of cmpxchg).
> > > >
> > > > Also, try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg
> > > > fails. There is no need to re-read the value in the loop.
> > > >
> > > > No functional change intended.
> > >
> > > As I mentioned in the RCU thread, I have issues with some of the changes
> > > here.
> > >
> > > >
> > > > Cc: Steven Rostedt <[email protected]>
> > > > Cc: Masami Hiramatsu <[email protected]>
> > > > Signed-off-by: Uros Bizjak <[email protected]>
> > > > ---
> > > > kernel/trace/ring_buffer.c | 20 ++++++++------------
> > > > 1 file changed, 8 insertions(+), 12 deletions(-)
> > > >
> > > > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> > > > index 4188af7d4cfe..8f0ef7d12ddd 100644
> > > > --- a/kernel/trace/ring_buffer.c
> > > > +++ b/kernel/trace/ring_buffer.c
> > > > @@ -1493,14 +1493,11 @@ static bool rb_head_page_replace(struct buffer_page *old,
> > > > {
> > > > unsigned long *ptr = (unsigned long *)&old->list.prev->next;
> > > > unsigned long val;
> > > > - unsigned long ret;
> > > >
> > > > val = *ptr & ~RB_FLAG_MASK;
> > > > val |= RB_PAGE_HEAD;
> > > >
> > > > - ret = cmpxchg(ptr, val, (unsigned long)&new->list);
> > > > -
> > > > - return ret == val;
> > > > + return try_cmpxchg(ptr, &val, (unsigned long)&new->list);
> > >
> > > No, val should not be updated.
> >
> > Please see the definition of try_cmpxchg. The definition is written in
> > such a way that benefits loops as well as linear code and in the later
> > case depends on the compiler to eliminate assignment to val as a dead
> > assignment.
> >
> > The above change was done under the assumption that val is unused
> > after try_cmpxchg, and can be considered as a temporary
> > [Alternatively, the value could be copied to a local temporary and a
> > pointer to this local temporary could be passed to try_cmpxchg
> > instead. Compiler is smart enough to eliminate the assignment in any
> > case.]

Ah I need to be more careful how I type.

> If I understood Steve correctly, I think the "misleading" part is that
> you are passing a variable by address to try_cmpxchg() but not really
> modifying it (unlike in the loop patterns).

It does modify it, but I meant it does not use it.

> Perhaps it is more meaningful to have an API that looks like:
> bool cmpxchg_succeeded(TYPE ptr, TYPE old, TYPE new)
> Where old is not a pointer (unlike try_cmpxchg), and the API returns bool.
>
> Both cleaner to read and has the optimization you want, I believe.
>
> However, the other point is, this is useful only for slow paths, but

Useful only for fast paths...

> at least cmpxchg_succeeded() is more readable and less "misleading"
> than try_cmpxchg() IMO.
>

Proofreading emails properly from here on! Not after the fact!

- Joel

2023-03-01 16:19:03

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, 1 Mar 2023 10:37:28 +0100
Uros Bizjak <[email protected]> wrote:

> > No, val should not be updated.
>
> Please see the definition of try_cmpxchg. The definition is written in
> such a way that benefits loops as well as linear code and in the later
> case depends on the compiler to eliminate assignment to val as a dead
> assignment.

I did read it ;-)

>
> The above change was done under the assumption that val is unused
> after try_cmpxchg, and can be considered as a temporary
> [Alternatively, the value could be copied to a local temporary and a
> pointer to this local temporary could be passed to try_cmpxchg
> instead. Compiler is smart enough to eliminate the assignment in any
> case.]
>
> Even in the linear code, the change has considerable effect.
> rb_head_page_replace is inlined in rb_get_reader_page and gcc-10.3.1
> improves code from:
>
> ef8: 48 8b 0e mov (%rsi),%rcx
> efb: 48 83 e1 fc and $0xfffffffffffffffc,%rcx
> eff: 48 83 c9 01 or $0x1,%rcx
> f03: 48 89 c8 mov %rcx,%rax
> f06: f0 48 0f b1 3e lock cmpxchg %rdi,(%rsi)
> f0b: 48 39 c1 cmp %rax,%rcx
> f0e: 74 3b je f4b <rb_get_reader_page+0x13b>
>
> to:
>
> ed8: 48 8b 01 mov (%rcx),%rax
> edb: 48 83 e0 fc and $0xfffffffffffffffc,%rax
> edf: 48 83 c8 01 or $0x1,%rax
> ee3: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
> ee8: 74 3b je f25 <rb_get_reader_page+0x135>

I'm using gcc 12.2.0 and have this;

cmpxchg:

0000000000000e50 <rb_get_reader_page>:
e50: 41 55 push %r13
e52: 41 54 push %r12
e54: 55 push %rbp
e55: 53 push %rbx
e56: 48 89 fb mov %rdi,%rbx
e59: 9c pushf
e5a: 5d pop %rbp
e5b: fa cli
e5c: 81 e5 00 02 00 00 and $0x200,%ebp
e62: 0f 85 e6 01 00 00 jne 104e <rb_get_reader_page+0x1fe>
e68: 48 8d 7b 1c lea 0x1c(%rbx),%rdi
e6c: 31 c0 xor %eax,%eax
e6e: ba 01 00 00 00 mov $0x1,%edx
e73: f0 0f b1 53 1c lock cmpxchg %edx,0x1c(%rbx)
e78: 0f 85 e5 01 00 00 jne 1063 <rb_get_reader_page+0x213>
e7e: 41 bc 03 00 00 00 mov $0x3,%r12d
e84: 4c 8b 6b 58 mov 0x58(%rbx),%r13
e88: 49 8b 55 30 mov 0x30(%r13),%rdx
e8c: 41 8b 45 18 mov 0x18(%r13),%eax
e90: 48 8b 4a 08 mov 0x8(%rdx),%rcx
e94: 39 c8 cmp %ecx,%eax
e96: 0f 82 61 01 00 00 jb ffd <rb_get_reader_page+0x1ad>
e9c: 48 8b 52 08 mov 0x8(%rdx),%rdx


try_cmpxchg:

0000000000000e70 <rb_get_reader_page>:
e70: 41 55 push %r13
e72: 41 54 push %r12
e74: 55 push %rbp
e75: 53 push %rbx
e76: 48 89 fb mov %rdi,%rbx
e79: 9c pushf
e7a: 5d pop %rbp
e7b: fa cli
e7c: 81 e5 00 02 00 00 and $0x200,%ebp
e82: 0f 85 e0 01 00 00 jne 1068 <rb_get_reader_page+0x1f8>
e88: 48 8d 7b 1c lea 0x1c(%rbx),%rdi
e8c: 31 c0 xor %eax,%eax
e8e: ba 01 00 00 00 mov $0x1,%edx
e93: f0 0f b1 53 1c lock cmpxchg %edx,0x1c(%rbx)
e98: 0f 85 df 01 00 00 jne 107d <rb_get_reader_page+0x20d>
e9e: 41 bc 03 00 00 00 mov $0x3,%r12d
ea4: 4c 8b 6b 58 mov 0x58(%rbx),%r13
ea8: 49 8b 55 30 mov 0x30(%r13),%rdx
eac: 41 8b 45 18 mov 0x18(%r13),%eax
eb0: 48 8b 4a 08 mov 0x8(%rdx),%rcx
eb4: 39 c8 cmp %ecx,%eax
eb6: 0f 82 5b 01 00 00 jb 1017 <rb_get_reader_page+0x1a7>
ebc: 48 8b 52 08 mov 0x8(%rdx),%rdx

Which has no difference :-/


>
> Again, even in linear code the change is able to eliminate the
> assignment to a temporary reg and the compare. Please note that there
> is no move *from* %rax register after cmpxchg, so the compiler
> correctly eliminated unused assignment.
>
> >
> > > }
> > >
> > > /*
> > > @@ -2055,7 +2052,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > > retries = 10;
> > > success = false;
> > > while (retries--) {
> > > - struct list_head *head_page, *prev_page, *r;
> > > + struct list_head *head_page, *prev_page;
> > > struct list_head *last_page, *first_page;
> > > struct list_head *head_page_with_bit;
> > >
> > > @@ -2073,9 +2070,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > > last_page->next = head_page_with_bit;
> > > first_page->prev = prev_page;
> > >
> > > - r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
> > > -
> > > - if (r == head_page_with_bit) {
> > > + if (try_cmpxchg(&prev_page->next,
> > > + &head_page_with_bit, first_page)) {
> >
> > No. head_page_with_bit should not be updated.
>
> As above, head_page_with_bit should be considered as a temporary, it
> is initialized a couple of lines above cmpxchg and unused after. The
> gcc-10.3.1 compiler even found some more optimization opportunities
> and reordered the code from:
>
> 1364: 4d 8b 86 38 01 00 00 mov 0x138(%r14),%r8
> 136b: 48 83 ce 01 or $0x1,%rsi
> 136f: 48 89 f0 mov %rsi,%rax
> 1372: 49 89 30 mov %rsi,(%r8)
> 1375: 48 89 4f 08 mov %rcx,0x8(%rdi)
> 1379: f0 48 0f b1 39 lock cmpxchg %rdi,(%rcx)
> 137e: 48 39 c6 cmp %rax,%rsi
> 1381: 74 78 je 13fb <rb_insert_pages+0xdb>
>
> to:
>
> 1343: 48 83 c8 01 or $0x1,%rax
> 1347: 48 8b bb 38 01 00 00 mov 0x138(%rbx),%rdi
> 134e: 48 89 07 mov %rax,(%rdi)
> 1351: 48 89 4e 08 mov %rcx,0x8(%rsi)
> 1355: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
> 135a: 41 0f 94 c7 sete %r15b
> 135e: 75 2f jne 138f <rb_insert_pages+0x8f>
>
> Please also note SETE insn in the above code, this is how the
> "success" variable is handled in the loop. So, besides code size
> improvement, other secondary improvements can be expected from the
> change, too.

For this gcc 12.2.0 did have a different result:

cmpxchg:

1436: 49 89 c5 mov %rax,%r13
1439: eb 41 jmp 147c <rb_update_pages+0x7c>
143b: 48 89 df mov %rbx,%rdi
143e: e8 bd ed ff ff call 200 <rb_set_head_page>
1443: 48 89 c2 mov %rax,%rdx
1446: 48 85 c0 test %rax,%rax
1449: 74 37 je 1482 <rb_update_pages+0x82>
144b: 48 8b 48 08 mov 0x8(%rax),%rcx
144f: 48 8b bb 30 01 00 00 mov 0x130(%rbx),%rdi
1456: 48 89 c6 mov %rax,%rsi
1459: 4c 8b 83 38 01 00 00 mov 0x138(%rbx),%r8
1460: 48 83 ce 01 or $0x1,%rsi
1464: 48 89 f0 mov %rsi,%rax
1467: 49 89 30 mov %rsi,(%r8)
146a: 48 89 4f 08 mov %rcx,0x8(%rdi)
146e: f0 48 0f b1 39 lock cmpxchg %rdi,(%rcx)
1473: 48 39 c6 cmp %rax,%rsi
1476: 0f 84 97 01 00 00 je 1613 <rb_update_pages+0x213>
147c: 41 83 ee 01 sub $0x1,%r14d
1480: 75 b9 jne 143b <rb_update_pages+0x3b>
1482: 48 8b 43 10 mov 0x10(%rbx),%rax
1486: f0 ff 40 08 lock incl 0x8(%rax)

try_cmpxchg:

1446: 49 89 c4 mov %rax,%r12
1449: 41 83 ee 01 sub $0x1,%r14d
144d: 0f 84 7b 01 00 00 je 15ce <rb_update_pages+0x1be>
1453: 48 89 df mov %rbx,%rdi
1456: e8 c5 ed ff ff call 220 <rb_set_head_page>
145b: 48 89 c2 mov %rax,%rdx
145e: 48 85 c0 test %rax,%rax
1461: 0f 84 67 01 00 00 je 15ce <rb_update_pages+0x1be>
1467: 48 8b 48 08 mov 0x8(%rax),%rcx
146b: 48 8b b3 30 01 00 00 mov 0x130(%rbx),%rsi
1472: 48 83 c8 01 or $0x1,%rax
1476: 48 8b bb 38 01 00 00 mov 0x138(%rbx),%rdi
147d: 48 89 07 mov %rax,(%rdi)
1480: 48 89 4e 08 mov %rcx,0x8(%rsi)
1484: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
1489: 75 be jne 1449 <rb_update_pages+0x39>
148b: 48 89 7a 08 mov %rdi,0x8(%rdx)
148f: 4c 89 e6 mov %r12,%rsi
1492: 48 89 ef mov %rbp,%rdi
1495: 4c 89 ab 30 01 00 00 mov %r13,0x130(%rbx)
149c: 4c 89 ab 38 01 00 00 mov %r13,0x138(%rbx)
14a3: e8 00 00 00 00 call 14a8 <rb_update_pages+0x98>

It's different, but I'm not so sure it's beneficial.

>
> I think that the above examples demonstrate various improvements that
> can be achieved with effectively a one-line, almost mechanical change
> to the code, even in linear code. It would be unfortunate to not
> consider them.

But with gcc 12.2.0 I don't really see the benefit. And I'm worried that
the side effect of modifying the old variable could cause a bug in the
future, if it is used after the try_cmpxchg(). At least for the second case.

-- Steve


2023-03-01 16:28:17

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, 1 Mar 2023 11:18:50 -0500
Steven Rostedt <[email protected]> wrote:

> But with gcc 12.2.0 I don't really see the benefit. And I'm worried that
> the side effect of modifying the old variable could cause a bug in the
> future, if it is used after the try_cmpxchg(). At least for the second case.

Actually, I like Joel's recommendation of adding a cmpxchg_succeeded()
function, that does the try_cmpxchg() without needing to save the old
variable. That's my main concern, as it does have that side effect that
could be missed when updating the code.

That would be the best of both worlds ;-)

-- Steve

2023-03-01 16:36:21

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 1/3] ring_buffer: Change some static functions to void

On Wed, 1 Mar 2023 09:46:50 +0100
Uros Bizjak <[email protected]> wrote:

> On Tue, Feb 28, 2023 at 11:55 PM Masami Hiramatsu <[email protected]> wrote:
> >
> > On Tue, 28 Feb 2023 18:59:27 +0100
> > Uros Bizjak <[email protected]> wrote:
> >
> > > The results of some static functions are not used. Change the
> > > type of these function to void and remove unnecessary returns.
> > >
> > > No functional change intended.
> >
> > NAK, instead of dropping the errors, please handle it on the caller side.
>
> I was under the impression that the intention of these two functions
> is to warn if there is any corruption in data pages detected. Please
> note that the patch has no effect on code size, as the compiler is
> smart enough to drop unused return values by itself. So, the change is
> mostly cosmetic as I was just bothered by unused returns. I'm not
> versed enough in the code to introduce additional error handling, so
> considering its minimal impact, the patch can just be dropped.
>

I'm not against the change.

Masami,

I don't think we need to check the return values, as when these checks
fail, it triggers RB_WARN_ON() which disables the ring buffer involved, and
that should stop further progress of other calls to the affected ring
buffer.

-- Steve


2023-03-01 17:16:23

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 5:18 PM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 1 Mar 2023 10:37:28 +0100
> Uros Bizjak <[email protected]> wrote:
>
> > > No, val should not be updated.
> >
> > Please see the definition of try_cmpxchg. The definition is written in
> > such a way that benefits loops as well as linear code and in the later
> > case depends on the compiler to eliminate assignment to val as a dead
> > assignment.
>
> I did read it ;-)
>
> >
> > The above change was done under the assumption that val is unused
> > after try_cmpxchg, and can be considered as a temporary
> > [Alternatively, the value could be copied to a local temporary and a
> > pointer to this local temporary could be passed to try_cmpxchg
> > instead. Compiler is smart enough to eliminate the assignment in any
> > case.]
> >
> > Even in the linear code, the change has considerable effect.
> > rb_head_page_replace is inlined in rb_get_reader_page and gcc-10.3.1
> > improves code from:
> >
> > ef8: 48 8b 0e mov (%rsi),%rcx
> > efb: 48 83 e1 fc and $0xfffffffffffffffc,%rcx
> > eff: 48 83 c9 01 or $0x1,%rcx
> > f03: 48 89 c8 mov %rcx,%rax
> > f06: f0 48 0f b1 3e lock cmpxchg %rdi,(%rsi)
> > f0b: 48 39 c1 cmp %rax,%rcx
> > f0e: 74 3b je f4b <rb_get_reader_page+0x13b>
> >
> > to:
> >
> > ed8: 48 8b 01 mov (%rcx),%rax
> > edb: 48 83 e0 fc and $0xfffffffffffffffc,%rax
> > edf: 48 83 c8 01 or $0x1,%rax
> > ee3: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
> > ee8: 74 3b je f25 <rb_get_reader_page+0x135>
>
> I'm using gcc 12.2.0 and have this;
>
> cmpxchg:
>
> 0000000000000e50 <rb_get_reader_page>:
> e50: 41 55 push %r13
> e52: 41 54 push %r12
> e54: 55 push %rbp
> e55: 53 push %rbx
> e56: 48 89 fb mov %rdi,%rbx
> e59: 9c pushf
> e5a: 5d pop %rbp
> e5b: fa cli
> e5c: 81 e5 00 02 00 00 and $0x200,%ebp
> e62: 0f 85 e6 01 00 00 jne 104e <rb_get_reader_page+0x1fe>
> e68: 48 8d 7b 1c lea 0x1c(%rbx),%rdi
> e6c: 31 c0 xor %eax,%eax
> e6e: ba 01 00 00 00 mov $0x1,%edx
> e73: f0 0f b1 53 1c lock cmpxchg %edx,0x1c(%rbx)
> e78: 0f 85 e5 01 00 00 jne 1063 <rb_get_reader_page+0x213>
> e7e: 41 bc 03 00 00 00 mov $0x3,%r12d
> e84: 4c 8b 6b 58 mov 0x58(%rbx),%r13
> e88: 49 8b 55 30 mov 0x30(%r13),%rdx
> e8c: 41 8b 45 18 mov 0x18(%r13),%eax
> e90: 48 8b 4a 08 mov 0x8(%rdx),%rcx
> e94: 39 c8 cmp %ecx,%eax
> e96: 0f 82 61 01 00 00 jb ffd <rb_get_reader_page+0x1ad>
> e9c: 48 8b 52 08 mov 0x8(%rdx),%rdx
>
>
> try_cmpxchg:
>
> 0000000000000e70 <rb_get_reader_page>:
> e70: 41 55 push %r13
> e72: 41 54 push %r12
> e74: 55 push %rbp
> e75: 53 push %rbx
> e76: 48 89 fb mov %rdi,%rbx
> e79: 9c pushf
> e7a: 5d pop %rbp
> e7b: fa cli
> e7c: 81 e5 00 02 00 00 and $0x200,%ebp
> e82: 0f 85 e0 01 00 00 jne 1068 <rb_get_reader_page+0x1f8>
> e88: 48 8d 7b 1c lea 0x1c(%rbx),%rdi
> e8c: 31 c0 xor %eax,%eax
> e8e: ba 01 00 00 00 mov $0x1,%edx
> e93: f0 0f b1 53 1c lock cmpxchg %edx,0x1c(%rbx)
> e98: 0f 85 df 01 00 00 jne 107d <rb_get_reader_page+0x20d>
> e9e: 41 bc 03 00 00 00 mov $0x3,%r12d
> ea4: 4c 8b 6b 58 mov 0x58(%rbx),%r13
> ea8: 49 8b 55 30 mov 0x30(%r13),%rdx
> eac: 41 8b 45 18 mov 0x18(%r13),%eax
> eb0: 48 8b 4a 08 mov 0x8(%rdx),%rcx
> eb4: 39 c8 cmp %ecx,%eax
> eb6: 0f 82 5b 01 00 00 jb 1017 <rb_get_reader_page+0x1a7>
> ebc: 48 8b 52 08 mov 0x8(%rdx),%rdx
>
> Which has no difference :-/

This lock cmpxchg belongs to some other locking primitive that were
already converted en masse to try_cmpxchg some time in the past. The
place we are looking for has a compare insn between lock cmpxchg and a
follow-up conditional jump in the original assembly code.

> > Again, even in linear code the change is able to eliminate the
> > assignment to a temporary reg and the compare. Please note that there
> > is no move *from* %rax register after cmpxchg, so the compiler
> > correctly eliminated unused assignment.
> >
> > >
> > > > }
> > > >
> > > > /*
> > > > @@ -2055,7 +2052,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > > > retries = 10;
> > > > success = false;
> > > > while (retries--) {
> > > > - struct list_head *head_page, *prev_page, *r;
> > > > + struct list_head *head_page, *prev_page;
> > > > struct list_head *last_page, *first_page;
> > > > struct list_head *head_page_with_bit;
> > > >
> > > > @@ -2073,9 +2070,8 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
> > > > last_page->next = head_page_with_bit;
> > > > first_page->prev = prev_page;
> > > >
> > > > - r = cmpxchg(&prev_page->next, head_page_with_bit, first_page);
> > > > -
> > > > - if (r == head_page_with_bit) {
> > > > + if (try_cmpxchg(&prev_page->next,
> > > > + &head_page_with_bit, first_page)) {
> > >
> > > No. head_page_with_bit should not be updated.
> >
> > As above, head_page_with_bit should be considered as a temporary, it
> > is initialized a couple of lines above cmpxchg and unused after. The
> > gcc-10.3.1 compiler even found some more optimization opportunities
> > and reordered the code from:
> >
> > 1364: 4d 8b 86 38 01 00 00 mov 0x138(%r14),%r8
> > 136b: 48 83 ce 01 or $0x1,%rsi
> > 136f: 48 89 f0 mov %rsi,%rax
> > 1372: 49 89 30 mov %rsi,(%r8)
> > 1375: 48 89 4f 08 mov %rcx,0x8(%rdi)
> > 1379: f0 48 0f b1 39 lock cmpxchg %rdi,(%rcx)
> > 137e: 48 39 c6 cmp %rax,%rsi
> > 1381: 74 78 je 13fb <rb_insert_pages+0xdb>
> >
> > to:
> >
> > 1343: 48 83 c8 01 or $0x1,%rax
> > 1347: 48 8b bb 38 01 00 00 mov 0x138(%rbx),%rdi
> > 134e: 48 89 07 mov %rax,(%rdi)
> > 1351: 48 89 4e 08 mov %rcx,0x8(%rsi)
> > 1355: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
> > 135a: 41 0f 94 c7 sete %r15b
> > 135e: 75 2f jne 138f <rb_insert_pages+0x8f>
> >
> > Please also note SETE insn in the above code, this is how the
> > "success" variable is handled in the loop. So, besides code size
> > improvement, other secondary improvements can be expected from the
> > change, too.
>
> For this gcc 12.2.0 did have a different result:
>
> cmpxchg:
>
> 1436: 49 89 c5 mov %rax,%r13
> 1439: eb 41 jmp 147c <rb_update_pages+0x7c>
> 143b: 48 89 df mov %rbx,%rdi
> 143e: e8 bd ed ff ff call 200 <rb_set_head_page>
> 1443: 48 89 c2 mov %rax,%rdx
> 1446: 48 85 c0 test %rax,%rax
> 1449: 74 37 je 1482 <rb_update_pages+0x82>
> 144b: 48 8b 48 08 mov 0x8(%rax),%rcx
> 144f: 48 8b bb 30 01 00 00 mov 0x130(%rbx),%rdi
> 1456: 48 89 c6 mov %rax,%rsi
> 1459: 4c 8b 83 38 01 00 00 mov 0x138(%rbx),%r8
> 1460: 48 83 ce 01 or $0x1,%rsi
> 1464: 48 89 f0 mov %rsi,%rax
> 1467: 49 89 30 mov %rsi,(%r8)
> 146a: 48 89 4f 08 mov %rcx,0x8(%rdi)
> 146e: f0 48 0f b1 39 lock cmpxchg %rdi,(%rcx)
> 1473: 48 39 c6 cmp %rax,%rsi
> 1476: 0f 84 97 01 00 00 je 1613 <rb_update_pages+0x213>
> 147c: 41 83 ee 01 sub $0x1,%r14d
> 1480: 75 b9 jne 143b <rb_update_pages+0x3b>
> 1482: 48 8b 43 10 mov 0x10(%rbx),%rax
> 1486: f0 ff 40 08 lock incl 0x8(%rax)
>
> try_cmpxchg:
>
> 1446: 49 89 c4 mov %rax,%r12
> 1449: 41 83 ee 01 sub $0x1,%r14d
> 144d: 0f 84 7b 01 00 00 je 15ce <rb_update_pages+0x1be>
> 1453: 48 89 df mov %rbx,%rdi
> 1456: e8 c5 ed ff ff call 220 <rb_set_head_page>
> 145b: 48 89 c2 mov %rax,%rdx
> 145e: 48 85 c0 test %rax,%rax
> 1461: 0f 84 67 01 00 00 je 15ce <rb_update_pages+0x1be>
> 1467: 48 8b 48 08 mov 0x8(%rax),%rcx
> 146b: 48 8b b3 30 01 00 00 mov 0x130(%rbx),%rsi
> 1472: 48 83 c8 01 or $0x1,%rax
> 1476: 48 8b bb 38 01 00 00 mov 0x138(%rbx),%rdi
> 147d: 48 89 07 mov %rax,(%rdi)
> 1480: 48 89 4e 08 mov %rcx,0x8(%rsi)
> 1484: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
> 1489: 75 be jne 1449 <rb_update_pages+0x39>
> 148b: 48 89 7a 08 mov %rdi,0x8(%rdx)
> 148f: 4c 89 e6 mov %r12,%rsi
> 1492: 48 89 ef mov %rbp,%rdi
> 1495: 4c 89 ab 30 01 00 00 mov %r13,0x130(%rbx)
> 149c: 4c 89 ab 38 01 00 00 mov %r13,0x138(%rbx)
> 14a3: e8 00 00 00 00 call 14a8 <rb_update_pages+0x98>
>
> It's different, but I'm not so sure it's beneficial.

This is the place we are looking for. Please see that a move at $1464
and a compare at $1473 is missing in the assembly from the patched
code. If it is beneficial ... well, we achieved the same result with
two instructions less in a loopy code.

Uros.

> >
> > I think that the above examples demonstrate various improvements that
> > can be achieved with effectively a one-line, almost mechanical change
> > to the code, even in linear code. It would be unfortunate to not
> > consider them.
>
> But with gcc 12.2.0 I don't really see the benefit. And I'm worried that
> the side effect of modifying the old variable could cause a bug in the
> future, if it is used after the try_cmpxchg(). At least for the second case.
>
> -- Steve
>

2023-03-01 17:57:29

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 5:28 PM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 1 Mar 2023 11:18:50 -0500
> Steven Rostedt <[email protected]> wrote:
>
> > But with gcc 12.2.0 I don't really see the benefit. And I'm worried that
> > the side effect of modifying the old variable could cause a bug in the
> > future, if it is used after the try_cmpxchg(). At least for the second case.
>
> Actually, I like Joel's recommendation of adding a cmpxchg_succeeded()
> function, that does the try_cmpxchg() without needing to save the old
> variable. That's my main concern, as it does have that side effect that
> could be missed when updating the code.

The "controversial" part of the patch would then look like the
attached patch. As expected, the compiler again produces expected
code:

eb8: 48 8b 0e mov (%rsi),%rcx
ebb: 48 83 e1 fc and $0xfffffffffffffffc,%rcx
ebf: 48 83 c9 01 or $0x1,%rcx
ec3: 48 89 c8 mov %rcx,%rax
ec6: f0 48 0f b1 3e lock cmpxchg %rdi,(%rsi)
ecb: 48 39 c1 cmp %rax,%rcx
ece: 74 2d je efd <rb_get_reader_page+0x12d>

to:

eb8: 48 8b 01 mov (%rcx),%rax
ebb: 48 83 e0 fc and $0xfffffffffffffffc,%rax
ebf: 48 83 c8 01 or $0x1,%rax
ec3: f0 48 0f b1 31 lock cmpxchg %rsi,(%rcx)
ec8: 74 2d je ef7 <rb_get_reader_page+0x127>

Uros.


Attachments:
p.diff.txt (1.69 kB)

2023-03-01 18:09:15

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 5:28 PM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 1 Mar 2023 11:18:50 -0500
> Steven Rostedt <[email protected]> wrote:
>
> > But with gcc 12.2.0 I don't really see the benefit. And I'm worried that
> > the side effect of modifying the old variable could cause a bug in the
> > future, if it is used after the try_cmpxchg(). At least for the second case.
>
> Actually, I like Joel's recommendation of adding a cmpxchg_succeeded()
> function, that does the try_cmpxchg() without needing to save the old
> variable. That's my main concern, as it does have that side effect that
> could be missed when updating the code.

I understand your concern regarding updating of head_page_with_bit in
the middle of rb_insert_pages. OTOH, rb_head_page_replace is a small
utility function where update happens in a return clause, so there is
no chance of using val after the try_cmpxchg. If we can ignore the
"updating" issue in rb_head_page_replace, we can simply define
cmpxchg_success in front of rb_insert_pages (now its sole user) and be
done with it.

Uros.

2023-03-01 18:18:41

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, 1 Mar 2023 18:16:04 +0100
Uros Bizjak <[email protected]> wrote:

> > It's different, but I'm not so sure it's beneficial.
>
> This is the place we are looking for. Please see that a move at $1464
> and a compare at $1473 is missing in the assembly from the patched
> code. If it is beneficial ... well, we achieved the same result with
> two instructions less in a loopy code.

Note, none of these locations are in fast paths. (now if we had a
local_try_cmpxchg() then we could improve those locations!).

Thus my main concern here is for correctness. Unfortunately, to add a
cmpxchg_success() would require a lot of work, as all the cmpxchg()
functions are really macros that do a lot of magic (the
include/linux/atomic/atomic-instrumented.h says its even generated!). So
that will likely not happen.

I have mixed feelings for this patch. I like the fact that you are looking
in optimizing the code, but I'm also concerned that it could cause issues
later down the road.

I am leaning to just taking this, and hope that it doesn't cause problems.
And I would really love to change all the local_cmpxchg() to
local_try_cmpxchg(). Hmm, I wonder how much of an effort that would be to
implement those?

-- Steve

2023-03-01 18:28:50

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, 1 Mar 2023 13:18:31 -0500
Steven Rostedt <[email protected]> wrote:

> I am leaning to just taking this, and hope that it doesn't cause problems.
> And I would really love to change all the local_cmpxchg() to
> local_try_cmpxchg(). Hmm, I wonder how much of an effort that would be to
> implement those?

I see that you were the one that added the generic support for
try_cmpxchg64() and messed with all those generated files. Care to add one
for local_try_cmpxchg() ;-)

-- Steve

2023-03-01 18:37:29

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 3/3] ring_buffer: Use try_cmpxchg instead of cmpxchg

On Wed, Mar 1, 2023 at 7:28 PM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 1 Mar 2023 13:18:31 -0500
> Steven Rostedt <[email protected]> wrote:
>
> > I am leaning to just taking this, and hope that it doesn't cause problems.
> > And I would really love to change all the local_cmpxchg() to
> > local_try_cmpxchg(). Hmm, I wonder how much of an effort that would be to
> > implement those?
>
> I see that you were the one that added the generic support for
> try_cmpxchg64() and messed with all those generated files. Care to add one
> for local_try_cmpxchg() ;-)

I already have a half-written patch that implements local_try_cmpxchg.
Half-written in the sense that above-mentioned scripts generate
correct locking primitives, but the fallback code for local_cmpxchg is
a bit different that the fallback code for cmpxchg. There is an effort
to unify all cmpxchg stuff (cmpxchg128 will be introduced) and I think
that local_cmpxchg should also be handled in the same way. So, if
there is interest, I can find some time to finish the infrastructure
patch and convert the uses. But this is quite an undertaking that will
take some time.

Uros.

2023-03-02 23:57:58

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 1/3] ring_buffer: Change some static functions to void

On Wed, 1 Mar 2023 11:34:16 -0500
Steven Rostedt <[email protected]> wrote:

> On Wed, 1 Mar 2023 09:46:50 +0100
> Uros Bizjak <[email protected]> wrote:
>
> > On Tue, Feb 28, 2023 at 11:55 PM Masami Hiramatsu <[email protected]> wrote:
> > >
> > > On Tue, 28 Feb 2023 18:59:27 +0100
> > > Uros Bizjak <[email protected]> wrote:
> > >
> > > > The results of some static functions are not used. Change the
> > > > type of these function to void and remove unnecessary returns.
> > > >
> > > > No functional change intended.
> > >
> > > NAK, instead of dropping the errors, please handle it on the caller side.
> >
> > I was under the impression that the intention of these two functions
> > is to warn if there is any corruption in data pages detected. Please
> > note that the patch has no effect on code size, as the compiler is
> > smart enough to drop unused return values by itself. So, the change is
> > mostly cosmetic as I was just bothered by unused returns. I'm not
> > versed enough in the code to introduce additional error handling, so
> > considering its minimal impact, the patch can just be dropped.
> >
>
> I'm not against the change.
>
> Masami,
>
> I don't think we need to check the return values, as when these checks
> fail, it triggers RB_WARN_ON() which disables the ring buffer involved, and
> that should stop further progress of other calls to the affected ring
> buffer.

Ah, so the RB_WARN_ON() has a side effect which stops all further operations.
OK, I got it.

Reviewed-by: Masami Hiramatsu (Google) <[email protected]>

>
> -- Steve
>


--
Masami Hiramatsu (Google) <[email protected]>