2024-01-27 02:01:35

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 0/4] lockdep cmp fn conversions

rationale:
*_lock_nested() is fundamentally broken - in order for lockdep to work
we need to be able to check that we're following some defined ordering,
and it's not possible to define a total ordering of an arbitrary number
of objects with only a small fixed size enum.

so it needs to go. awhile back I added the ability to set a comparison
function for a lock class, and this is the start of hopefully a slow
steady trickle of patches as time allows to convert code to use it.

Kent Overstreet (4):
fs/pipe: Convert to lockdep_cmp_fn
pktcdvd: kill mutex_lock_nested() usage
net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
af_unix: convert to lock_cmp_fn

drivers/block/pktcdvd.c | 8 ++---
fs/pipe.c | 77 ++++++++++++++++------------------------
include/linux/lockdep.h | 3 ++
include/net/af_unix.h | 3 --
kernel/locking/lockdep.c | 6 ++++
net/core/sock.c | 1 +
net/unix/af_unix.c | 24 ++++++-------
net/unix/diag.c | 2 +-
8 files changed, 55 insertions(+), 69 deletions(-)

--
2.43.0



2024-01-27 02:01:57

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()

Cc: [email protected]
Signed-off-by: Kent Overstreet <[email protected]>
---
net/core/sock.c | 1 +
net/unix/af_unix.c | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..da7360c0f454 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
spin_lock_init(&sk->sk_peer_lock);
+ lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);

sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..d013de3c5490 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)

if (sk < peersk) {
spin_lock(&sk->sk_peer_lock);
- spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&peersk->sk_peer_lock);
} else {
spin_lock(&peersk->sk_peer_lock);
- spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&sk->sk_peer_lock);
}
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
--
2.43.0


2024-01-27 02:02:00

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

*_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.

the replacement is to define lock ordering with a proper comparison
function.

fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.

Cc: Alexander Viro <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Jan Kara <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
---
fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <[email protected]> 2002-05-09
*/

-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
{
- if (pipe->files)
- mutex_lock_nested(&pipe->mutex, subclass);
+ return cmp_int((unsigned long) a, (unsigned long) b);
}
+#endif

void pipe_lock(struct pipe_inode_info *pipe)
{
- /*
- * pipe_lock() nests non-pipe inode locks (for writing to a file)
- */
- pipe_lock_nested(pipe, I_MUTEX_PARENT);
+ if (pipe->files)
+ mutex_lock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_lock);

@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
}
EXPORT_SYMBOL(pipe_unlock);

-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
- mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
- mutex_unlock(&pipe->mutex);
-}
-
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);

- if (pipe1 < pipe2) {
- pipe_lock_nested(pipe1, I_MUTEX_PARENT);
- pipe_lock_nested(pipe2, I_MUTEX_CHILD);
- } else {
- pipe_lock_nested(pipe2, I_MUTEX_PARENT);
- pipe_lock_nested(pipe1, I_MUTEX_CHILD);
- }
+ if (pipe1 > pipe2)
+ swap(pipe1, pipe2);
+
+ pipe_lock(pipe1);
+ pipe_lock(pipe2);
}

static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;

ret = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

/*
* We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

/*
* We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
wake_next_reader = true;
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (unlikely(total_len == 0))
return 0;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* after waiting we need to re-check whether the pipe
* become empty while we dropped the lock.
*/
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_empty = pipe_empty(pipe->head, pipe->tail);
wake_next_writer = true;
}
out:
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
wake_next_writer = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

/*
* If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

switch (cmd) {
case FIONREAD:
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
count = 0;
head = pipe->head;
tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count += pipe->bufs[tail & mask].len;
tail++;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

return put_user(count, (int __user *)arg);

#ifdef CONFIG_WATCH_QUEUE
case IOC_WATCH_QUEUE_SET_SIZE: {
int ret;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
ret = watch_queue_set_size(pipe, arg);
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}

@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
{
struct pipe_inode_info *pipe = file->private_data;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (file->f_mode & FMODE_READ)
pipe->readers--;
if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

put_pipe_info(inode, pipe);
return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
struct pipe_inode_info *pipe = filp->private_data;
int retval = 0;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (filp->f_mode & FMODE_READ)
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
/* this can happen only if on == T */
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return retval;
}

@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
+ lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
return pipe;
}

@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
filp->private_data = pipe;
/* OK, we have a pipe and it's pinned down */

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

/* We can only do regular read/write on fifos */
stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
}

/* Ok! */
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return 0;

err_rd:
@@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
goto err;

err:
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

put_pipe_info(inode, pipe);
return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
if (!pipe)
return -EBADF;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

switch (cmd) {
case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
break;
}

- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}

--
2.43.0


2024-01-27 02:02:32

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 4/4] af_unix: convert to lock_cmp_fn

Kill
- unix_state_lock_nested
- _nested usage for net->unx.table.locks[].

replace both with lock_set_cmp_fn_ptr_order(&u->lock).

The lock ordering in sk_diag_dump_icons() looks suspicious; this may
turn up a real issue.

Cc: [email protected]
Signed-off-by: Kent Overstreet <[email protected]>
---
include/net/af_unix.h | 3 ---
net/unix/af_unix.c | 20 ++++++++------------
net/unix/diag.c | 2 +-
3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..4eff0a089640 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -48,9 +48,6 @@ struct scm_stat {

#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
- spin_lock_nested(&unix_sk(s)->lock, \
- SINGLE_DEPTH_NESTING)

/* The AF_UNIX socket */
struct unix_sock {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d013de3c5490..1a0d273799c1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
swap(hash1, hash2);

spin_lock(&net->unx.table.locks[hash1]);
- spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash2]);
}

static void unix_table_double_unlock(struct net *net,
@@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
+ lock_set_cmp_fn_ptr_order(&u->lock);
atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
@@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
{
- if (unlikely(sk1 == sk2) || !sk2) {
- unix_state_lock(sk1);
- return;
- }
- if (sk1 < sk2) {
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+ if (sk1 && sk1 != sk2)
unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ unix_state_lock(sk2);
}

static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out_unlock;
}

- unix_state_lock_nested(sk);
+ unix_state_lock(sk);

if (sk->sk_state != st) {
unix_state_unlock(sk);
@@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)

for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock_init(&net->unx.table.locks[i]);
+ lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}

diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..8ab5e2217e4c 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock(req);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
--
2.43.0


2024-01-27 02:05:00

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage

Unecessary, we're not actually taking nested locks of the same type.

Cc: [email protected]
Cc: Jens Axboe <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
---
drivers/block/pktcdvd.c | 8 ++++----
fs/pipe.c | 10 +---------
include/linux/lockdep.h | 3 +++
kernel/locking/lockdep.c | 6 ++++++
4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..2eb68a624fda 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -332,7 +332,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
{
int n = 0;
int idx;
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);
for (idx = 0; idx < MAX_WRITERS; idx++) {
struct pktcdvd_device *pd = pkt_devs[idx];
if (!pd)
@@ -2639,7 +2639,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
struct pktcdvd_device *pd;
struct gendisk *disk;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

for (idx = 0; idx < MAX_WRITERS; idx++)
if (!pkt_devs[idx])
@@ -2729,7 +2729,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
int idx;
int ret = 0;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

for (idx = 0; idx < MAX_WRITERS; idx++) {
pd = pkt_devs[idx];
@@ -2780,7 +2780,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
{
struct pktcdvd_device *pd;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
if (pd) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..abe171566015 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -78,14 +78,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;

#define cmp_int(l, r) ((l > r) - (l < r))

-#ifdef CONFIG_PROVE_LOCKING
-static int pipe_lock_cmp_fn(const struct lockdep_map *a,
- const struct lockdep_map *b)
-{
- return cmp_int((unsigned long) a, (unsigned long) b);
-}
-#endif
-
void pipe_lock(struct pipe_inode_info *pipe)
{
if (pipe->files)
@@ -824,7 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
- lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
+ lock_set_cmp_fn_ptr_order(&pipe->mutex);
return pipe;
}

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..e0b121f96c80 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -391,6 +391,7 @@ extern int lockdep_is_held(const void *);
#endif /* !LOCKDEP */

#ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *, const struct lockdep_map *);
void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);

#define lock_set_cmp_fn(lock, ...) lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
@@ -398,6 +399,8 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
#define lock_set_cmp_fn(lock, ...) do { } while (0)
#endif

+#define lock_set_cmp_fn_ptr_order(lock) lock_set_cmp_fn(lock, lockdep_ptr_order_cmp_fn);
+
enum xhlock_context_t {
XHLOCK_HARD,
XHLOCK_SOFT,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3de5936..5630be7f5cb2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4919,6 +4919,12 @@ struct lock_class_key __lockdep_no_validate__;
EXPORT_SYMBOL_GPL(__lockdep_no_validate__);

#ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ return cmp_int((unsigned long) a, (unsigned long) b);
+}
+
void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
lock_print_fn print_fn)
{
--
2.43.0


2024-01-29 14:11:18

by Christian Brauner

[permalink] [raw]
Subject: Re: (subset) [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri, 26 Jan 2024 21:01:05 -0500, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
>
> the replacement is to define lock ordering with a proper comparison
> function.
>
> [...]

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/4] fs/pipe: Convert to lockdep_cmp_fn
https://git.kernel.org/vfs/vfs/c/38b25beb3b67

2024-01-29 15:22:40

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri 26-01-24 21:01:05, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
>
> the replacement is to define lock ordering with a proper comparison
> function.
>
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
>
> Cc: Alexander Viro <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Jan Kara <[email protected]>
> Signed-off-by: Kent Overstreet <[email protected]>

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara <[email protected]>

Honza


> ---
> fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> 1 file changed, 36 insertions(+), 45 deletions(-)
>
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> * -- Manfred Spraul <[email protected]> 2002-05-09
> */
>
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r) ((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> + const struct lockdep_map *b)
> {
> - if (pipe->files)
> - mutex_lock_nested(&pipe->mutex, subclass);
> + return cmp_int((unsigned long) a, (unsigned long) b);
> }
> +#endif
>
> void pipe_lock(struct pipe_inode_info *pipe)
> {
> - /*
> - * pipe_lock() nests non-pipe inode locks (for writing to a file)
> - */
> - pipe_lock_nested(pipe, I_MUTEX_PARENT);
> + if (pipe->files)
> + mutex_lock(&pipe->mutex);
> }
> EXPORT_SYMBOL(pipe_lock);
>
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> }
> EXPORT_SYMBOL(pipe_unlock);
>
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> - mutex_unlock(&pipe->mutex);
> -}
> -
> void pipe_double_lock(struct pipe_inode_info *pipe1,
> struct pipe_inode_info *pipe2)
> {
> BUG_ON(pipe1 == pipe2);
>
> - if (pipe1 < pipe2) {
> - pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> - } else {
> - pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> - }
> + if (pipe1 > pipe2)
> + swap(pipe1, pipe2);
> +
> + pipe_lock(pipe1);
> + pipe_lock(pipe2);
> }
>
> static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> return 0;
>
> ret = 0;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /*
> * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> ret = -EAGAIN;
> break;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> return -ERESTARTSYS;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> wake_next_reader = true;
> }
> if (pipe_empty(pipe->head, pipe->tail))
> wake_next_reader = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> if (was_full)
> wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> if (unlikely(total_len == 0))
> return 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> if (!pipe->readers) {
> send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> * after waiting we need to re-check whether the pipe
> * become empty while we dropped the lock.
> */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> if (was_empty)
> wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_empty = pipe_empty(pipe->head, pipe->tail);
> wake_next_writer = true;
> }
> out:
> if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> wake_next_writer = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>
> switch (cmd) {
> case FIONREAD:
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> count = 0;
> head = pipe->head;
> tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> count += pipe->bufs[tail & mask].len;
> tail++;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> return put_user(count, (int __user *)arg);
>
> #ifdef CONFIG_WATCH_QUEUE
> case IOC_WATCH_QUEUE_SET_SIZE: {
> int ret;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> ret = watch_queue_set_size(pipe, arg);
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> {
> struct pipe_inode_info *pipe = file->private_data;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (file->f_mode & FMODE_READ)
> pipe->readers--;
> if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> struct pipe_inode_info *pipe = filp->private_data;
> int retval = 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (filp->f_mode & FMODE_READ)
> retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> /* this can happen only if on == T */
> fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return retval;
> }
>
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> pipe->nr_accounted = pipe_bufs;
> pipe->user = user;
> mutex_init(&pipe->mutex);
> + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> return pipe;
> }
>
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> filp->private_data = pipe;
> /* OK, we have a pipe and it's pinned down */
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /* We can only do regular read/write on fifos */
> stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> }
>
> /* Ok! */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return 0;
>
> err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> goto err;
>
> err:
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> if (!pipe)
> return -EBADF;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> switch (cmd) {
> case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> break;
> }
>
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> --
> 2.43.0
>
--
Jan Kara <[email protected]>
SUSE Labs, CR