2023-01-03 06:48:01

by Hongchen Zhang

[permalink] [raw]
Subject: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

Use spinlock in pipe_read/write cost too much time,IMO
pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
On the other hand, we can use __pipe_lock/unlock to protect the
pipe->head/tail in pipe_resize_ring and post_one_notification.

Signed-off-by: Hongchen Zhang <[email protected]>
---
fs/pipe.c | 24 ++++--------------------
include/linux/pipe_fs_i.h | 12 ++++++++++++
kernel/watch_queue.c | 8 ++++----
3 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..cf449779bf71 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
}
EXPORT_SYMBOL(pipe_unlock);

-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
- mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
- mutex_unlock(&pipe->mutex);
-}
-
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
@@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
*/
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
for (;;) {
- /* Read ->head with a barrier vs post_one_notification() */
- unsigned int head = smp_load_acquire(&pipe->head);
+ unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;

@@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)

if (!buf->len) {
pipe_buf_release(pipe, buf);
- spin_lock_irq(&pipe->rd_wait.lock);
#ifdef CONFIG_WATCH_QUEUE
if (buf->flags & PIPE_BUF_FLAG_LOSS)
pipe->note_loss = true;
#endif
tail++;
pipe->tail = tail;
- spin_unlock_irq(&pipe->rd_wait.lock);
}
total_len -= chars;
if (!total_len)
@@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* it, either the reader will consume it or it'll still
* be there for the next write.
*/
- spin_lock_irq(&pipe->rd_wait.lock);

head = pipe->head;
if (pipe_full(head, pipe->tail, pipe->max_usage)) {
- spin_unlock_irq(&pipe->rd_wait.lock);
continue;
}

pipe->head = head + 1;
- spin_unlock_irq(&pipe->rd_wait.lock);

/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
@@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
if (unlikely(!bufs))
return -ENOMEM;

- spin_lock_irq(&pipe->rd_wait.lock);
+ __pipe_lock(pipe);
mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;

n = pipe_occupancy(head, tail);
if (nr_slots < n) {
- spin_unlock_irq(&pipe->rd_wait.lock);
+ __pipe_unlock(pipe);
kfree(bufs);
return -EBUSY;
}
@@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;

- spin_unlock_irq(&pipe->rd_wait.lock);
+ __pipe_unlock(pipe);

/* This might have made more room for writers */
wake_up_interruptible(&pipe->wr_wait);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 6cb65df3e3ba..f5084daf6eaf 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_PIPE_FS_I_H
#define _LINUX_PIPE_FS_I_H

+#include <linux/fs.h>
+
#define PIPE_DEF_BUFFERS 16

#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
@@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
#define PIPE_SIZE PAGE_SIZE

/* Pipe lock and unlock operations */
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+ mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+ mutex_unlock(&pipe->mutex);
+}
+
void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
index a6f9bdd956c3..92e46cfe9419 100644
--- a/kernel/watch_queue.c
+++ b/kernel/watch_queue.c
@@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
if (!pipe)
return false;

- spin_lock_irq(&pipe->rd_wait.lock);
+ __pipe_lock(pipe);

mask = pipe->ring_size - 1;
head = pipe->head;
@@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
buf->offset = offset;
buf->len = len;
buf->flags = PIPE_BUF_FLAG_WHOLE;
- smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
+ pipe->head = head + 1;

if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
- spin_unlock_irq(&pipe->rd_wait.lock);
+ __pipe_unlock(pipe);
BUG();
}
wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
done = true;

out:
- spin_unlock_irq(&pipe->rd_wait.lock);
+ __pipe_unlock(pipe);
if (done)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
return done;

base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
--
2.31.1


2023-01-03 18:30:43

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote:
> Use spinlock in pipe_read/write cost too much time,IMO

Everybody has an opinion. Do you have data?

> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_lock/unlock to protect the
> pipe->head/tail in pipe_resize_ring and post_one_notification.
>
> Signed-off-by: Hongchen Zhang <[email protected]>
> ---

you're supposed to write here what changes you made between v1 and v2.

> fs/pipe.c | 24 ++++--------------------
> include/linux/pipe_fs_i.h | 12 ++++++++++++
> kernel/watch_queue.c | 8 ++++----
> 3 files changed, 20 insertions(+), 24 deletions(-)
>
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 42c7ff41c2db..cf449779bf71 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> }
> EXPORT_SYMBOL(pipe_unlock);
>
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> - mutex_unlock(&pipe->mutex);
> -}
> -
> void pipe_double_lock(struct pipe_inode_info *pipe1,
> struct pipe_inode_info *pipe2)
> {
> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> */
> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> for (;;) {
> - /* Read ->head with a barrier vs post_one_notification() */
> - unsigned int head = smp_load_acquire(&pipe->head);
> + unsigned int head = pipe->head;
> unsigned int tail = pipe->tail;
> unsigned int mask = pipe->ring_size - 1;
>
> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>
> if (!buf->len) {
> pipe_buf_release(pipe, buf);
> - spin_lock_irq(&pipe->rd_wait.lock);
> #ifdef CONFIG_WATCH_QUEUE
> if (buf->flags & PIPE_BUF_FLAG_LOSS)
> pipe->note_loss = true;
> #endif
> tail++;
> pipe->tail = tail;
> - spin_unlock_irq(&pipe->rd_wait.lock);
> }
> total_len -= chars;
> if (!total_len)
> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> * it, either the reader will consume it or it'll still
> * be there for the next write.
> */
> - spin_lock_irq(&pipe->rd_wait.lock);
>
> head = pipe->head;
> if (pipe_full(head, pipe->tail, pipe->max_usage)) {
> - spin_unlock_irq(&pipe->rd_wait.lock);
> continue;
> }
>
> pipe->head = head + 1;
> - spin_unlock_irq(&pipe->rd_wait.lock);
>
> /* Insert it into the buffer array */
> buf = &pipe->bufs[head & mask];
> @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
> if (unlikely(!bufs))
> return -ENOMEM;
>
> - spin_lock_irq(&pipe->rd_wait.lock);
> + __pipe_lock(pipe);
> mask = pipe->ring_size - 1;
> head = pipe->head;
> tail = pipe->tail;
>
> n = pipe_occupancy(head, tail);
> if (nr_slots < n) {
> - spin_unlock_irq(&pipe->rd_wait.lock);
> + __pipe_unlock(pipe);
> kfree(bufs);
> return -EBUSY;
> }
> @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
> pipe->tail = tail;
> pipe->head = head;
>
> - spin_unlock_irq(&pipe->rd_wait.lock);
> + __pipe_unlock(pipe);
>
> /* This might have made more room for writers */
> wake_up_interruptible(&pipe->wr_wait);
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index 6cb65df3e3ba..f5084daf6eaf 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -2,6 +2,8 @@
> #ifndef _LINUX_PIPE_FS_I_H
> #define _LINUX_PIPE_FS_I_H
>
> +#include <linux/fs.h>
> +
> #define PIPE_DEF_BUFFERS 16
>
> #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
> #define PIPE_SIZE PAGE_SIZE
>
> /* Pipe lock and unlock operations */
> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
> +{
> + mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> +}
> +
> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> +{
> + mutex_unlock(&pipe->mutex);
> +}
> +
> void pipe_lock(struct pipe_inode_info *);
> void pipe_unlock(struct pipe_inode_info *);
> void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
> index a6f9bdd956c3..92e46cfe9419 100644
> --- a/kernel/watch_queue.c
> +++ b/kernel/watch_queue.c
> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
> if (!pipe)
> return false;
>
> - spin_lock_irq(&pipe->rd_wait.lock);
> + __pipe_lock(pipe);
>
> mask = pipe->ring_size - 1;
> head = pipe->head;
> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
> buf->offset = offset;
> buf->len = len;
> buf->flags = PIPE_BUF_FLAG_WHOLE;
> - smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
> + pipe->head = head + 1;
>
> if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
> - spin_unlock_irq(&pipe->rd_wait.lock);
> + __pipe_unlock(pipe);
> BUG();
> }
> wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> done = true;
>
> out:
> - spin_unlock_irq(&pipe->rd_wait.lock);
> + __pipe_unlock(pipe);
> if (done)
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> return done;
>
> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
> --
> 2.31.1
>

2023-01-04 04:24:32

by Hongchen Zhang

[permalink] [raw]
Subject: Re: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

Hi Matthew,

On 2023/1/4 am 1:51, Matthew Wilcox wrote:
> On Tue, Jan 03, 2023 at 02:33:03PM +0800, Hongchen Zhang wrote:
>> Use spinlock in pipe_read/write cost too much time,IMO
>
> Everybody has an opinion. Do you have data?
>
I tested this patch using UnixBench's pipe test case on a x86_64
machine,and get the following data:
1) before this patch
System Benchmarks Partial Index BASELINE RESULT INDEX
Pipe Throughput 12440.0 493023.3 396.3
========
System Benchmarks Index Score (Partial Only) 396.3

2) after this patch
System Benchmarks Partial Index BASELINE RESULT INDEX
Pipe Throughput 12440.0 507551.4 408.0
========
System Benchmarks Index Score (Partial Only) 408.0

so we get ~3% speedup.

>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_lock/unlock to protect the
>> pipe->head/tail in pipe_resize_ring and post_one_notification.
>>
>> Signed-off-by: Hongchen Zhang <[email protected]>
>> ---
>
> you're supposed to write here what changes you made between v1 and v2.
>
I added the linux/fs.h in v2 to fix the linux-test-robot test error in v1.
>> fs/pipe.c | 24 ++++--------------------
>> include/linux/pipe_fs_i.h | 12 ++++++++++++
>> kernel/watch_queue.c | 8 ++++----
>> 3 files changed, 20 insertions(+), 24 deletions(-)
>>
>> diff --git a/fs/pipe.c b/fs/pipe.c
>> index 42c7ff41c2db..cf449779bf71 100644
>> --- a/fs/pipe.c
>> +++ b/fs/pipe.c
>> @@ -98,16 +98,6 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>> }
>> EXPORT_SYMBOL(pipe_unlock);
>>
>> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> -{
>> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> -}
>> -
>> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> -{
>> - mutex_unlock(&pipe->mutex);
>> -}
>> -
>> void pipe_double_lock(struct pipe_inode_info *pipe1,
>> struct pipe_inode_info *pipe2)
>> {
>> @@ -253,8 +243,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>> */
>> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>> for (;;) {
>> - /* Read ->head with a barrier vs post_one_notification() */
>> - unsigned int head = smp_load_acquire(&pipe->head);
>> + unsigned int head = pipe->head;
>> unsigned int tail = pipe->tail;
>> unsigned int mask = pipe->ring_size - 1;
>>
>> @@ -322,14 +311,12 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>>
>> if (!buf->len) {
>> pipe_buf_release(pipe, buf);
>> - spin_lock_irq(&pipe->rd_wait.lock);
>> #ifdef CONFIG_WATCH_QUEUE
>> if (buf->flags & PIPE_BUF_FLAG_LOSS)
>> pipe->note_loss = true;
>> #endif
>> tail++;
>> pipe->tail = tail;
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> }
>> total_len -= chars;
>> if (!total_len)
>> @@ -506,16 +493,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>> * it, either the reader will consume it or it'll still
>> * be there for the next write.
>> */
>> - spin_lock_irq(&pipe->rd_wait.lock);
>>
>> head = pipe->head;
>> if (pipe_full(head, pipe->tail, pipe->max_usage)) {
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> continue;
>> }
>>
>> pipe->head = head + 1;
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>>
>> /* Insert it into the buffer array */
>> buf = &pipe->bufs[head & mask];
>> @@ -1260,14 +1244,14 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>> if (unlikely(!bufs))
>> return -ENOMEM;
>>
>> - spin_lock_irq(&pipe->rd_wait.lock);
>> + __pipe_lock(pipe);
>> mask = pipe->ring_size - 1;
>> head = pipe->head;
>> tail = pipe->tail;
>>
>> n = pipe_occupancy(head, tail);
>> if (nr_slots < n) {
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> + __pipe_unlock(pipe);
>> kfree(bufs);
>> return -EBUSY;
>> }
>> @@ -1303,7 +1287,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
>> pipe->tail = tail;
>> pipe->head = head;
>>
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> + __pipe_unlock(pipe);
>>
>> /* This might have made more room for writers */
>> wake_up_interruptible(&pipe->wr_wait);
>> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
>> index 6cb65df3e3ba..f5084daf6eaf 100644
>> --- a/include/linux/pipe_fs_i.h
>> +++ b/include/linux/pipe_fs_i.h
>> @@ -2,6 +2,8 @@
>> #ifndef _LINUX_PIPE_FS_I_H
>> #define _LINUX_PIPE_FS_I_H
>>
>> +#include <linux/fs.h>
>> +
>> #define PIPE_DEF_BUFFERS 16
>>
>> #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
>> @@ -223,6 +225,16 @@ static inline void pipe_discard_from(struct pipe_inode_info *pipe,
>> #define PIPE_SIZE PAGE_SIZE
>>
>> /* Pipe lock and unlock operations */
>> +static inline void __pipe_lock(struct pipe_inode_info *pipe)
>> +{
>> + mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
>> +}
>> +
>> +static inline void __pipe_unlock(struct pipe_inode_info *pipe)
>> +{
>> + mutex_unlock(&pipe->mutex);
>> +}
>> +
>> void pipe_lock(struct pipe_inode_info *);
>> void pipe_unlock(struct pipe_inode_info *);
>> void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
>> diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
>> index a6f9bdd956c3..92e46cfe9419 100644
>> --- a/kernel/watch_queue.c
>> +++ b/kernel/watch_queue.c
>> @@ -108,7 +108,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
>> if (!pipe)
>> return false;
>>
>> - spin_lock_irq(&pipe->rd_wait.lock);
>> + __pipe_lock(pipe);
>>
>> mask = pipe->ring_size - 1;
>> head = pipe->head;
>> @@ -135,17 +135,17 @@ static bool post_one_notification(struct watch_queue *wqueue,
>> buf->offset = offset;
>> buf->len = len;
>> buf->flags = PIPE_BUF_FLAG_WHOLE;
>> - smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */
>> + pipe->head = head + 1;
>>
>> if (!test_and_clear_bit(note, wqueue->notes_bitmap)) {
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> + __pipe_unlock(pipe);
>> BUG();
>> }
>> wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>> done = true;
>>
>> out:
>> - spin_unlock_irq(&pipe->rd_wait.lock);
>> + __pipe_unlock(pipe);
>> if (done)
>> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>> return done;
>>
>> base-commit: c8451c141e07a8d05693f6c8d0e418fbb4b68bb7
>> --
>> 2.31.1
>>

2023-01-06 03:08:34

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

On Tue, 3 Jan 2023 14:33:03 +0800 Hongchen Zhang <[email protected]> wrote:

> Use spinlock in pipe_read/write cost too much time,IMO
> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
> On the other hand, we can use __pipe_lock/unlock to protect the
> pipe->head/tail in pipe_resize_ring and post_one_notification.

Can you please test this with the test code in Linus's 0ddad21d3e99 and
check that performance is good?

2023-01-06 07:18:45

by Oliver Sang

[permalink] [raw]
Subject: Re: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock


Greeting,

FYI, we noticed WARNING:possible_recursive_locking_detected due to commit (built with gcc-11):

commit: 2afced4b77a399b14eb2e2797968228d7ce69a2a ("[PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock")
url: https://github.com/intel-lab-lkp/linux/commits/Hongchen-Zhang/pipe-use-__pipe_-lock-unlock-instead-of-spinlock/20230103-143459
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

in testcase: trinity
version: trinity-static-x86_64-x86_64-1c734c75-1_2020-01-06
with following parameters:

runtime: 300s

test-description: Trinity is a linux system call fuzz tester.
test-url: http://codemonkey.org.uk/projects/trinity/


on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


If you fix the issue, kindly add following tag
| Reported-by: kernel test robot <[email protected]>
| Link: https://lore.kernel.org/oe-lkp/[email protected]


[ 493.585155][ T1930] Unable to find swap-space signature
[ 508.410154][ T1930]
[ 508.410930][ T1930] ============================================
[ 508.412135][ T1930] WARNING: possible recursive locking detected
[ 508.413313][ T1930] 6.2.0-rc1-00085-g2afced4b77a3 #14 Not tainted
[ 508.414545][ T1930] --------------------------------------------
[ 508.415735][ T1930] trinity-c2/1930 is trying to acquire lock:
[ 508.416905][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_resize_ring (??:?)
[ 508.418698][ T1930]
[ 508.418698][ T1930] but task is already holding lock:
[ 508.420086][ T1930] ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?)
[ 508.421781][ T1930]
[ 508.421781][ T1930] other info that might help us debug this:
[ 508.423268][ T1930] Possible unsafe locking scenario:
[ 508.423268][ T1930]
[ 508.424688][ T1930] CPU0
[ 508.425410][ T1930] ----
[ 508.426105][ T1930] lock(&pipe->mutex/1);
[ 508.426972][ T1930] lock(&pipe->mutex/1);
[ 508.427840][ T1930]
[ 508.427840][ T1930] *** DEADLOCK ***
[ 508.427840][ T1930]
[ 508.429476][ T1930] May be due to missing lock nesting notation
[ 508.429476][ T1930]
[ 508.430981][ T1930] 1 lock held by trinity-c2/1930:
[ 508.431945][ T1930] #0: ffff8881641d3068 (&pipe->mutex/1){+.+.}-{3:3}, at: pipe_fcntl (??:?)
[ 508.433663][ T1930]
[ 508.433663][ T1930] stack backtrace:
[ 508.434847][ T1930] CPU: 1 PID: 1930 Comm: trinity-c2 Not tainted 6.2.0-rc1-00085-g2afced4b77a3 #14 b8d9e225d32aed8adc2a69ef5f115031b187ce0c
[ 508.436917][ T1930] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014
[ 508.438696][ T1930] Call Trace:
[ 508.439402][ T1930] <TASK>
[ 508.440075][ T1930] dump_stack_lvl (??:?)
[ 508.440972][ T1930] validate_chain.cold (lockdep.c:?)
[ 508.441972][ T1930] ? check_prev_add (lockdep.c:?)
[ 508.442911][ T1930] ? mark_held_locks (lockdep.c:?)
[ 508.443824][ T1930] __lock_acquire (lockdep.c:?)
[ 508.444714][ T1930] lock_acquire (??:?)
[ 508.445575][ T1930] ? pipe_resize_ring (??:?)
[ 508.446497][ T1930] ? rcu_read_unlock (main.c:?)
[ 508.447403][ T1930] ? entry_SYSCALL_64_after_hwframe (??:?)
[ 508.448483][ T1930] __mutex_lock (mutex.c:?)
[ 508.449361][ T1930] ? pipe_resize_ring (??:?)
[ 508.450248][ T1930] ? kasan_quarantine_reduce (??:?)
[ 508.451257][ T1930] ? lock_downgrade (lockdep.c:?)
[ 508.452161][ T1930] ? pipe_resize_ring (??:?)
[ 508.453053][ T1930] ? mark_held_locks (lockdep.c:?)
[ 508.453935][ T1930] ? mutex_lock_io_nested (mutex.c:?)
[ 508.454925][ T1930] ? kasan_quarantine_reduce (??:?)
[ 508.455990][ T1930] ? pipe_resize_ring (??:?)
[ 508.456904][ T1930] ? pipe_resize_ring (??:?)
[ 508.457862][ T1930] ? pipe_resize_ring (??:?)
[ 508.458791][ T1930] pipe_resize_ring (??:?)
[ 508.459702][ T1930] pipe_fcntl (??:?)
[ 508.460537][ T1930] ? find_held_lock (lockdep.c:?)
[ 508.461435][ T1930] do_fcntl (fcntl.c:?)
[ 508.462220][ T1930] ? __task_pid_nr_ns (??:?)
[ 508.463185][ T1930] ? f_getown (fcntl.c:?)
[ 508.464058][ T1930] ? __x64_sys_alarm (??:?)
[ 508.464959][ T1930] __x64_sys_fcntl (??:?)
[ 508.465869][ T1930] do_syscall_64 (??:?)
[ 508.466745][ T1930] entry_SYSCALL_64_after_hwframe (??:?)
[ 508.467721][ T1930] RIP: 0033:0x463519
[ 508.468493][ T1930] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 59 00 00 c3 66 2e 0f 1f 84 00 00 00 00
All code
========
0: 00 f3 add %dh,%bl
2: c3 retq
3: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
a: 00 00 00
d: 0f 1f 40 00 nopl 0x0(%rax)
11: 48 89 f8 mov %rdi,%rax
14: 48 89 f7 mov %rsi,%rdi
17: 48 89 d6 mov %rdx,%rsi
1a: 48 89 ca mov %rcx,%rdx
1d: 4d 89 c2 mov %r8,%r10
20: 4d 89 c8 mov %r9,%r8
23: 4c 8b 4c 24 08 mov 0x8(%rsp),%r9
28: 0f 05 syscall
2a:* 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax <-- trapping instruction
30: 0f 83 db 59 00 00 jae 0x5a11
36: c3 retq
37: 66 data16
38: 2e cs
39: 0f .byte 0xf
3a: 1f (bad)
3b: 84 00 test %al,(%rax)
3d: 00 00 add %al,(%rax)
...

Code starting with the faulting instruction
===========================================
0: 48 3d 01 f0 ff ff cmp $0xfffffffffffff001,%rax
6: 0f 83 db 59 00 00 jae 0x59e7
c: c3 retq
d: 66 data16
e: 2e cs
f: 0f .byte 0xf
10: 1f (bad)
11: 84 00 test %al,(%rax)
13: 00 00 add %al,(%rax)
...
[ 508.471663][ T1930] RSP: 002b:00007ffd6a2fa2c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000048
[ 508.473176][ T1930] RAX: ffffffffffffffda RBX: 0000000000000048 RCX: 0000000000463519
[ 508.474574][ T1930] RDX: 0000000000000066 RSI: 0000000000000407 RDI: 000000000000013e
[ 508.476026][ T1930] RBP: 00007f36cc433000 R08: fffffffffffffff8 R09: 000000000000000f
[ 508.477459][ T1930] R10: 0000000010000000 R11: 0000000000000246 R12: 0000000000000002
[ 508.478870][ T1930] R13: 00007f36cc433058 R14: 0000000002241850 R15: 00007f36cc433000
[ 508.482573][ T1930] </TASK>
[ 510.837665][ T2080] Unable to find swap-space signature
[ 518.245623][ T2080] futex_wake_op: trinity-c1 tries to shift op by -1; fix this program
[ 639.655156][ T298] hwclock: can't open '/dev/misc/rtc': No such file or directory
LKP: ttyS0: 273: LKP: rebooting forcely
[ 651.988559][ T273] sysrq: Emergency Sync
[ 651.989495][ T29] Emergency Sync complete
[ 651.990747][ T273] sysrq: Resetting



To reproduce:

# build kernel
cd linux
cp config-6.2.0-rc1-00085-g2afced4b77a3 .config
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 olddefconfig prepare modules_prepare bzImage modules
make HOSTCC=gcc-11 CC=gcc-11 ARCH=x86_64 INSTALL_MOD_PATH=<mod-install-dir> modules_install
cd <mod-install-dir>
find lib/ | cpio -o -H newc --quiet | gzip > modules.cgz


git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> -m modules.cgz job-script # job-script is attached in this email

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.



--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests



Attachments:
(No filename) (7.80 kB)
config-6.2.0-rc1-00085-g2afced4b77a3 (152.54 kB)
job-script (4.61 kB)
dmesg.xz (37.20 kB)
trinity (12.10 kB)
Download all attachments

2023-01-06 08:08:56

by Hongchen Zhang

[permalink] [raw]
Subject: Re: [PATCH v2] pipe: use __pipe_{lock,unlock} instead of spinlock

Hi Andrew,
On 2023/1/6 am 10:59, Andrew Morton wrote:
> On Tue, 3 Jan 2023 14:33:03 +0800 Hongchen Zhang <[email protected]> wrote:
>
>> Use spinlock in pipe_read/write cost too much time,IMO
>> pipe->{head,tail} can be protected by __pipe_{lock,unlock}.
>> On the other hand, we can use __pipe_lock/unlock to protect the
>> pipe->head/tail in pipe_resize_ring and post_one_notification.
>
> Can you please test this with the test code in Linus's 0ddad21d3e99 and
> check that performance is good?
>
I tested with the test code in Linus's 0ddad21d3e99,and get the
following result:

1) before this patch
13,136.54 msec task-clock # 3.870 CPUs utilized

1,186,779 context-switches # 90.342 K/sec

668,867 cpu-migrations # 50.917 K/sec

895 page-faults # 68.131 /sec

29,875,711,543 cycles # 2.274 GHz

12,372,397,462 instructions # 0.41 insn per cycle

2,480,235,723 branches # 188.804 M/sec

47,191,943 branch-misses # 1.90% of all branches


3.394806886 seconds time elapsed

0.037869000 seconds user
0.189346000 seconds sys

2) after this patch

12,395.63 msec task-clock # 4.138 CPUs utilized

1,193,381 context-switches # 96.274 K/sec

585,543 cpu-migrations # 47.238 K/sec

1,063 page-faults # 85.756 /sec

27,691,587,226 cycles # 2.234 GHz

11,738,307,999 instructions # 0.42 insn per cycle

2,351,299,522 branches # 189.688 M/sec

45,404,526 branch-misses # 1.93% of all branches


2.995280878 seconds time elapsed

0.010615000 seconds user
0.206999000 seconds sys
After adding this patch, the time used on this test program becomes less.

Another thing, because of my carelessness, trinity tool tested a bug, I
will remove the unnecessary __pipe_{lock,unlock} in pipe_resize_ring in
v3, because the lock is owned in pipe_fcntl/pipe_ioctl.

Thanks.