2024-01-27 02:21:50

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 0/4] lockdep cmp fn conversions

rationale:
*_lock_nested() is fundamentally broken - in order for lockdep to work
we need to be able to check that we're following some defined ordering,
and it's not possible to define a total ordering of an arbitrary number
of objects with only a small fixed size enum.

so it needs to go. awhile back I added the ability to set a comparison
function for a lock class, and this is the start of hopefully a slow
steady trickle of patches as time allows to convert code to use it.

Kent Overstreet (4):
fs/pipe: Convert to lockdep_cmp_fn
pktcdvd: kill mutex_lock_nested() usage
net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
af_unix: convert to lock_cmp_fn

drivers/block/pktcdvd.c | 8 ++---
fs/pipe.c | 77 ++++++++++++++++------------------------
include/linux/lockdep.h | 3 ++
include/net/af_unix.h | 3 --
kernel/locking/lockdep.c | 6 ++++
net/core/sock.c | 1 +
net/unix/af_unix.c | 24 ++++++-------
net/unix/diag.c | 2 +-
8 files changed, 55 insertions(+), 69 deletions(-)

--
2.43.0



2024-01-27 02:22:20

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

*_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.

the replacement is to define lock ordering with a proper comparison
function.

fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.

Cc: Alexander Viro <[email protected]>
Cc: Christian Brauner <[email protected]>
Cc: Jan Kara <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
---
fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
* -- Manfred Spraul <[email protected]> 2002-05-09
*/

-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r) ((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
{
- if (pipe->files)
- mutex_lock_nested(&pipe->mutex, subclass);
+ return cmp_int((unsigned long) a, (unsigned long) b);
}
+#endif

void pipe_lock(struct pipe_inode_info *pipe)
{
- /*
- * pipe_lock() nests non-pipe inode locks (for writing to a file)
- */
- pipe_lock_nested(pipe, I_MUTEX_PARENT);
+ if (pipe->files)
+ mutex_lock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_lock);

@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
}
EXPORT_SYMBOL(pipe_unlock);

-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
- mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
- mutex_unlock(&pipe->mutex);
-}
-
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
BUG_ON(pipe1 == pipe2);

- if (pipe1 < pipe2) {
- pipe_lock_nested(pipe1, I_MUTEX_PARENT);
- pipe_lock_nested(pipe2, I_MUTEX_CHILD);
- } else {
- pipe_lock_nested(pipe2, I_MUTEX_PARENT);
- pipe_lock_nested(pipe1, I_MUTEX_CHILD);
- }
+ if (pipe1 > pipe2)
+ swap(pipe1, pipe2);
+
+ pipe_lock(pipe1);
+ pipe_lock(pipe2);
}

static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;

ret = 0;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

/*
* We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
break;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

/*
* We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
wake_next_reader = true;
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

if (was_full)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if (unlikely(total_len == 0))
return 0;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
* after waiting we need to re-check whether the pipe
* become empty while we dropped the lock.
*/
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
if (was_empty)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
was_empty = pipe_empty(pipe->head, pipe->tail);
wake_next_writer = true;
}
out:
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
wake_next_writer = false;
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

/*
* If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

switch (cmd) {
case FIONREAD:
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
count = 0;
head = pipe->head;
tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count += pipe->bufs[tail & mask].len;
tail++;
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

return put_user(count, (int __user *)arg);

#ifdef CONFIG_WATCH_QUEUE
case IOC_WATCH_QUEUE_SET_SIZE: {
int ret;
- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
ret = watch_queue_set_size(pipe, arg);
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}

@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
{
struct pipe_inode_info *pipe = file->private_data;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (file->f_mode & FMODE_READ)
pipe->readers--;
if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

put_pipe_info(inode, pipe);
return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
struct pipe_inode_info *pipe = filp->private_data;
int retval = 0;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);
if (filp->f_mode & FMODE_READ)
retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
/* this can happen only if on == T */
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return retval;
}

@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
+ lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
return pipe;
}

@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
filp->private_data = pipe;
/* OK, we have a pipe and it's pinned down */

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

/* We can only do regular read/write on fifos */
stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
}

/* Ok! */
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return 0;

err_rd:
@@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
goto err;

err:
- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);

put_pipe_info(inode, pipe);
return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
if (!pipe)
return -EBADF;

- __pipe_lock(pipe);
+ mutex_lock(&pipe->mutex);

switch (cmd) {
case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
break;
}

- __pipe_unlock(pipe);
+ mutex_unlock(&pipe->mutex);
return ret;
}

--
2.43.0


2024-01-27 02:27:03

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 4/4] af_unix: convert to lock_cmp_fn

Kill
- unix_state_lock_nested
- _nested usage for net->unx.table.locks[].

replace both with lock_set_cmp_fn_ptr_order(&u->lock).

The lock ordering in sk_diag_dump_icons() looks suspicious; this may
turn up a real issue.

Cc: [email protected]
Signed-off-by: Kent Overstreet <[email protected]>
---
include/net/af_unix.h | 3 ---
net/unix/af_unix.c | 20 ++++++++------------
net/unix/diag.c | 2 +-
3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..4eff0a089640 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -48,9 +48,6 @@ struct scm_stat {

#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
- spin_lock_nested(&unix_sk(s)->lock, \
- SINGLE_DEPTH_NESTING)

/* The AF_UNIX socket */
struct unix_sock {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d013de3c5490..1a0d273799c1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
swap(hash1, hash2);

spin_lock(&net->unx.table.locks[hash1]);
- spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash2]);
}

static void unix_table_double_unlock(struct net *net,
@@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
+ lock_set_cmp_fn_ptr_order(&u->lock);
atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
@@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
{
- if (unlikely(sk1 == sk2) || !sk2) {
- unix_state_lock(sk1);
- return;
- }
- if (sk1 < sk2) {
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+ if (sk1 && sk1 != sk2)
unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ unix_state_lock(sk2);
}

static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
goto out_unlock;
}

- unix_state_lock_nested(sk);
+ unix_state_lock(sk);

if (sk->sk_state != st) {
unix_state_unlock(sk);
@@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)

for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock_init(&net->unx.table.locks[i]);
+ lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}

diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..8ab5e2217e4c 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock(req);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
--
2.43.0


2024-01-27 02:31:19

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage

Unecessary, we're not actually taking nested locks of the same type.

Cc: [email protected]
Cc: Jens Axboe <[email protected]>
Signed-off-by: Kent Overstreet <[email protected]>
---
drivers/block/pktcdvd.c | 8 ++++----
fs/pipe.c | 10 +---------
include/linux/lockdep.h | 3 +++
kernel/locking/lockdep.c | 6 ++++++
4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..2eb68a624fda 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -332,7 +332,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
{
int n = 0;
int idx;
- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);
for (idx = 0; idx < MAX_WRITERS; idx++) {
struct pktcdvd_device *pd = pkt_devs[idx];
if (!pd)
@@ -2639,7 +2639,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
struct pktcdvd_device *pd;
struct gendisk *disk;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

for (idx = 0; idx < MAX_WRITERS; idx++)
if (!pkt_devs[idx])
@@ -2729,7 +2729,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
int idx;
int ret = 0;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

for (idx = 0; idx < MAX_WRITERS; idx++) {
pd = pkt_devs[idx];
@@ -2780,7 +2780,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
{
struct pktcdvd_device *pd;

- mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+ mutex_lock(&ctl_mutex);

pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
if (pd) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..abe171566015 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -78,14 +78,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;

#define cmp_int(l, r) ((l > r) - (l < r))

-#ifdef CONFIG_PROVE_LOCKING
-static int pipe_lock_cmp_fn(const struct lockdep_map *a,
- const struct lockdep_map *b)
-{
- return cmp_int((unsigned long) a, (unsigned long) b);
-}
-#endif
-
void pipe_lock(struct pipe_inode_info *pipe)
{
if (pipe->files)
@@ -824,7 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
pipe->nr_accounted = pipe_bufs;
pipe->user = user;
mutex_init(&pipe->mutex);
- lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
+ lock_set_cmp_fn_ptr_order(&pipe->mutex);
return pipe;
}

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..e0b121f96c80 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -391,6 +391,7 @@ extern int lockdep_is_held(const void *);
#endif /* !LOCKDEP */

#ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *, const struct lockdep_map *);
void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);

#define lock_set_cmp_fn(lock, ...) lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
@@ -398,6 +399,8 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
#define lock_set_cmp_fn(lock, ...) do { } while (0)
#endif

+#define lock_set_cmp_fn_ptr_order(lock) lock_set_cmp_fn(lock, lockdep_ptr_order_cmp_fn);
+
enum xhlock_context_t {
XHLOCK_HARD,
XHLOCK_SOFT,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3de5936..5630be7f5cb2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4919,6 +4919,12 @@ struct lock_class_key __lockdep_no_validate__;
EXPORT_SYMBOL_GPL(__lockdep_no_validate__);

#ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ return cmp_int((unsigned long) a, (unsigned long) b);
+}
+
void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
lock_print_fn print_fn)
{
--
2.43.0


2024-01-27 02:31:22

by Kent Overstreet

[permalink] [raw]
Subject: [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()

Cc: [email protected]
Signed-off-by: Kent Overstreet <[email protected]>
---
net/core/sock.c | 1 +
net/unix/af_unix.c | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..da7360c0f454 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
spin_lock_init(&sk->sk_peer_lock);
+ lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);

sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..d013de3c5490 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)

if (sk < peersk) {
spin_lock(&sk->sk_peer_lock);
- spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&peersk->sk_peer_lock);
} else {
spin_lock(&peersk->sk_peer_lock);
- spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&sk->sk_peer_lock);
}
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
--
2.43.0


2024-01-28 04:30:45

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage

Hi Kent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/locking/core]
[also build test ERROR on net/main net-next/main linus/master v6.8-rc1 next-20240125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Kent-Overstreet/fs-pipe-Convert-to-lockdep_cmp_fn/20240127-101832
base: tip/locking/core
patch link: https://lore.kernel.org/r/20240127020833.487907-3-kent.overstreet%40linux.dev
patch subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
config: mips-randconfig-r063-20240127 (https://download.01.org/0day-ci/archive/20240128/[email protected]/config)
compiler: clang version 15.0.7 (https://github.com/llvm/llvm-project.git 8dfdcc7b7bf66834a761bd8de445840ef68e4d1a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240128/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

>> kernel/locking/lockdep.c:4925:9: error: call to undeclared function 'cmp_int'; ISO C99 and later do not support implicit function declarations [-Werror,-Wimplicit-function-declaration]
return cmp_int((unsigned long) a, (unsigned long) b);
^
kernel/locking/lockdep.c:4925:9: note: did you mean 'smp_init'?
include/linux/smp.h:225:20: note: 'smp_init' declared here
static inline void smp_init(void) { }
^
1 error generated.


vim +/cmp_int +4925 kernel/locking/lockdep.c

4920
4921 #ifdef CONFIG_PROVE_LOCKING
4922 int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
4923 const struct lockdep_map *b)
4924 {
> 4925 return cmp_int((unsigned long) a, (unsigned long) b);
4926 }
4927

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-01-28 06:49:39

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage

Hi Kent,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/locking/core]
[also build test ERROR on net/main net-next/main linus/master v6.8-rc1 next-20240125]
[cannot apply to horms-ipvs/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Kent-Overstreet/fs-pipe-Convert-to-lockdep_cmp_fn/20240127-101832
base: tip/locking/core
patch link: https://lore.kernel.org/r/20240127020833.487907-3-kent.overstreet%40linux.dev
patch subject: [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
config: x86_64-randconfig-161-20240127 (https://download.01.org/0day-ci/archive/20240128/[email protected]/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240128/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

kernel/locking/lockdep.c: In function 'lockdep_ptr_order_cmp_fn':
>> kernel/locking/lockdep.c:4925:9: error: implicit declaration of function 'cmp_int'; did you mean 'smp_init'? [-Werror=implicit-function-declaration]
4925 | return cmp_int((unsigned long) a, (unsigned long) b);
| ^~~~~~~
| smp_init
cc1: some warnings being treated as errors


vim +4925 kernel/locking/lockdep.c

4920
4921 #ifdef CONFIG_PROVE_LOCKING
4922 int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
4923 const struct lockdep_map *b)
4924 {
> 4925 return cmp_int((unsigned long) a, (unsigned long) b);
4926 }
4927

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-01-28 08:29:12

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn

From: Kent Overstreet <[email protected]>
Date: Fri, 26 Jan 2024 21:08:31 -0500
> Kill
> - unix_state_lock_nested
> - _nested usage for net->unx.table.locks[].
>
> replace both with lock_set_cmp_fn_ptr_order(&u->lock).
>
> The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> turn up a real issue.

Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().

The lock order in sk_diag_dump_icons() is

listening socket -> child socket in the listener's queue

, and the inverse order never happens. ptr comparison does not make
sense in this case, and lockdep will complain about false positive.


>
> Cc: [email protected]
> Signed-off-by: Kent Overstreet <[email protected]>
> ---
> include/net/af_unix.h | 3 ---
> net/unix/af_unix.c | 20 ++++++++------------
> net/unix/diag.c | 2 +-
> 3 files changed, 9 insertions(+), 16 deletions(-)
>
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index 49c4640027d8..4eff0a089640 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -48,9 +48,6 @@ struct scm_stat {
>
> #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
> #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock)
> -#define unix_state_lock_nested(s) \
> - spin_lock_nested(&unix_sk(s)->lock, \
> - SINGLE_DEPTH_NESTING)
>
> /* The AF_UNIX socket */
> struct unix_sock {
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index d013de3c5490..1a0d273799c1 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
> swap(hash1, hash2);
>
> spin_lock(&net->unx.table.locks[hash1]);
> - spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
> + spin_lock(&net->unx.table.locks[hash2]);
> }
>
> static void unix_table_double_unlock(struct net *net,
> @@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
> u->path.dentry = NULL;
> u->path.mnt = NULL;
> spin_lock_init(&u->lock);
> + lock_set_cmp_fn_ptr_order(&u->lock);
> atomic_long_set(&u->inflight, 0);
> INIT_LIST_HEAD(&u->link);
> mutex_init(&u->iolock); /* single task reading lock */
> @@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>
> static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
> {
> - if (unlikely(sk1 == sk2) || !sk2) {
> - unix_state_lock(sk1);
> - return;
> - }
> - if (sk1 < sk2) {
> + if (sk1 > sk2)
> + swap(sk1, sk2);
> + if (sk1 && sk1 != sk2)
> unix_state_lock(sk1);
> - unix_state_lock_nested(sk2);
> - } else {
> - unix_state_lock(sk2);
> - unix_state_lock_nested(sk1);
> - }
> + unix_state_lock(sk2);
> }
>
> static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
> @@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> goto out_unlock;
> }
>
> - unix_state_lock_nested(sk);
> + unix_state_lock(sk);
>
> if (sk->sk_state != st) {
> unix_state_unlock(sk);
> @@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
>
> for (i = 0; i < UNIX_HASH_SIZE; i++) {
> spin_lock_init(&net->unx.table.locks[i]);
> + lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
> INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
> }
>
> diff --git a/net/unix/diag.c b/net/unix/diag.c
> index bec09a3a1d44..8ab5e2217e4c 100644
> --- a/net/unix/diag.c
> +++ b/net/unix/diag.c
> @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
> * queue lock. With the other's queue locked it's
> * OK to lock the state.
> */
> - unix_state_lock_nested(req);
> + unix_state_lock(req);
> peer = unix_sk(req)->peer;
> buf[i++] = (peer ? sock_i_ino(peer) : 0);
> unix_state_unlock(req);
> --
> 2.43.0

2024-01-28 09:18:30

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()

From: Kent Overstreet <[email protected]>
Date: Fri, 26 Jan 2024 21:08:30 -0500
> Cc: [email protected]
> Signed-off-by: Kent Overstreet <[email protected]>
> ---
> net/core/sock.c | 1 +
> net/unix/af_unix.c | 4 ++--
> 2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 158dbdebce6a..da7360c0f454 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
> sk->sk_peer_pid = NULL;
> sk->sk_peer_cred = NULL;
> spin_lock_init(&sk->sk_peer_lock);
> + lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);
>
> sk->sk_write_pending = 0;
> sk->sk_rcvlowat = 1;
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index ac1f2bc18fc9..d013de3c5490 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
>
> if (sk < peersk) {
> spin_lock(&sk->sk_peer_lock);
> - spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> + spin_lock(&peersk->sk_peer_lock);
> } else {
> spin_lock(&peersk->sk_peer_lock);
> - spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
> + spin_lock(&sk->sk_peer_lock);
> }

hmm.. I think we need not hold two locks here in the first place.
Let me post patches.

Thanks!


> old_pid = sk->sk_peer_pid;
> old_cred = sk->sk_peer_cred;
> --
> 2.43.0

2024-01-28 19:38:25

by Kent Overstreet

[permalink] [raw]
Subject: Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn

On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <[email protected]>
> Date: Fri, 26 Jan 2024 21:08:31 -0500
> > Kill
> > - unix_state_lock_nested
> > - _nested usage for net->unx.table.locks[].
> >
> > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> >
> > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > turn up a real issue.
>
> Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
>
> The lock order in sk_diag_dump_icons() is
>
> listening socket -> child socket in the listener's queue
>
> , and the inverse order never happens. ptr comparison does not make
> sense in this case, and lockdep will complain about false positive.

Is that a real lock ordering? Is this parent -> child relationship well
defined?

If it is, we should be able to write a lock_cmp_fn for it, as long as
it's not some handwavy "this will never happen but _nested won't check
for it" like I saw elsewhere in the net code... :)

2024-01-28 20:57:07

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn

From: Kent Overstreet <[email protected]>
Date: Sun, 28 Jan 2024 14:38:02 -0500
> On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > From: Kent Overstreet <[email protected]>
> > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > Kill
> > > - unix_state_lock_nested
> > > - _nested usage for net->unx.table.locks[].
> > >
> > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > >
> > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > turn up a real issue.
> >
> > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> >
> > The lock order in sk_diag_dump_icons() is
> >
> > listening socket -> child socket in the listener's queue
> >
> > , and the inverse order never happens. ptr comparison does not make
> > sense in this case, and lockdep will complain about false positive.
>
> Is that a real lock ordering? Is this parent -> child relationship well
> defined?
>
> If it is, we should be able to write a lock_cmp_fn for it, as long as
> it's not some handwavy "this will never happen but _nested won't check
> for it" like I saw elsewhere in the net code... :)

The problem would be there's no handy way to detect the relationship
except for iterating the queue again.

---8<---
static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
const struct lockdep_map *_b)
{
const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);

if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
/* check if b is a's cihld */
}

/* otherwise, ptr comparison here. */
}
---8<---


This can be resolved by a patch like this, which is in my local tree
for another series.

So, after posting the series, I can revisit this and write lock_cmp_fn
for u->lock.

---8<---
commit 12d39766b06068fda5987f4e7b4827e8c3273137
Author: Kuniyuki Iwashima <[email protected]>
Date: Thu Jan 11 22:36:58 2024 +0000

af_unix: Save listener for embryo socket.

Signed-off-by: Kuniyuki Iwashima <[email protected]>

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9ea04653c7c9..d0c0d81bcb1d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -82,6 +82,7 @@ struct scm_stat {
struct unix_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
+#define usk_listener sk.__sk_common.skc_unix_listener
struct unix_address *addr;
struct path path;
struct mutex iolock, bindlock;
diff --git a/include/net/sock.h b/include/net/sock.h
index a9d99a9c583f..3df3d068345a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -142,6 +142,8 @@ typedef __u64 __bitwise __addrpair;
* [union with @skc_incoming_cpu]
* @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
* [union with @skc_incoming_cpu]
+ * @skc_unix_listener: connection request listener socket for AF_UNIX
+ * [union with @skc_rxhash]
* @skc_refcnt: reference count
*
* This is the minimal network layer representation of sockets, the header
@@ -227,6 +229,7 @@ struct sock_common {
u32 skc_rxhash;
u32 skc_window_clamp;
u32 skc_tw_snd_nxt; /* struct tcp_timewait_sock */
+ struct sock *skc_unix_listener; /* struct unix_sock */
};
/* public: */
};
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5f9871555ec6..4a41bb727c32 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -991,6 +991,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
+ u->usk_listener = NULL;
u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
@@ -1612,6 +1613,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
newsk->sk_type = sk->sk_type;
init_peercred(newsk);
newu = unix_sk(newsk);
+ newu->usk_listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);

@@ -1707,8 +1709,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
- struct sock *tsk;
struct sk_buff *skb;
+ struct sock *tsk;
int err;

err = -EOPNOTSUPP;
@@ -1733,6 +1735,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
}

tsk = skb->sk;
+ unix_sk(tsk)->usk_listener = NULL;
skb_free_datagram(sk, skb);
wake_up_interruptible(&unix_sk(sk)->peer_wait);

---8<---


2024-01-29 01:34:29

by Kent Overstreet

[permalink] [raw]
Subject: Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn

On Sun, Jan 28, 2024 at 12:56:32PM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <[email protected]>
> Date: Sun, 28 Jan 2024 14:38:02 -0500
> > On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > > From: Kent Overstreet <[email protected]>
> > > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > > Kill
> > > > - unix_state_lock_nested
> > > > - _nested usage for net->unx.table.locks[].
> > > >
> > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > > >
> > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > > turn up a real issue.
> > >
> > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> > >
> > > The lock order in sk_diag_dump_icons() is
> > >
> > > listening socket -> child socket in the listener's queue
> > >
> > > , and the inverse order never happens. ptr comparison does not make
> > > sense in this case, and lockdep will complain about false positive.
> >
> > Is that a real lock ordering? Is this parent -> child relationship well
> > defined?
> >
> > If it is, we should be able to write a lock_cmp_fn for it, as long as
> > it's not some handwavy "this will never happen but _nested won't check
> > for it" like I saw elsewhere in the net code... :)
>
> The problem would be there's no handy way to detect the relationship
> except for iterating the queue again.
>
> ---8<---
> static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
> const struct lockdep_map *_b)
> {
> const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
> const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);
>
> if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
> /* check if b is a's cihld */
> }
>
> /* otherwise, ptr comparison here. */
> }
> ---8<---
>
>
> This can be resolved by a patch like this, which is in my local tree
> for another series.
>
> So, after posting the series, I can revisit this and write lock_cmp_fn
> for u->lock.

Sounds good! Please CC me when you do.

2024-02-02 12:04:17

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
>
> the replacement is to define lock ordering with a proper comparison
> function.
>
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
>
> Cc: Alexander Viro <[email protected]>
> Cc: Christian Brauner <[email protected]>
> Cc: Jan Kara <[email protected]>
> Signed-off-by: Kent Overstreet <[email protected]>

I had to digest for a while what this new lockdep lock ordering feature is
about. I have one pending question - what is the motivation of this
conversion of pipe code? AFAIU we don't have any problems with lockdep
annotations on pipe->mutex because there are always only two subclasses?

Honza

> ---
> fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> 1 file changed, 36 insertions(+), 45 deletions(-)
>
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> * -- Manfred Spraul <[email protected]> 2002-05-09
> */
>
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r) ((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> + const struct lockdep_map *b)
> {
> - if (pipe->files)
> - mutex_lock_nested(&pipe->mutex, subclass);
> + return cmp_int((unsigned long) a, (unsigned long) b);
> }
> +#endif
>
> void pipe_lock(struct pipe_inode_info *pipe)
> {
> - /*
> - * pipe_lock() nests non-pipe inode locks (for writing to a file)
> - */
> - pipe_lock_nested(pipe, I_MUTEX_PARENT);
> + if (pipe->files)
> + mutex_lock(&pipe->mutex);
> }
> EXPORT_SYMBOL(pipe_lock);
>
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> }
> EXPORT_SYMBOL(pipe_unlock);
>
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> - mutex_unlock(&pipe->mutex);
> -}
> -
> void pipe_double_lock(struct pipe_inode_info *pipe1,
> struct pipe_inode_info *pipe2)
> {
> BUG_ON(pipe1 == pipe2);
>
> - if (pipe1 < pipe2) {
> - pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> - } else {
> - pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> - pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> - }
> + if (pipe1 > pipe2)
> + swap(pipe1, pipe2);
> +
> + pipe_lock(pipe1);
> + pipe_lock(pipe2);
> }
>
> static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> return 0;
>
> ret = 0;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /*
> * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> ret = -EAGAIN;
> break;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> return -ERESTARTSYS;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> wake_next_reader = true;
> }
> if (pipe_empty(pipe->head, pipe->tail))
> wake_next_reader = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> if (was_full)
> wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> if (unlikely(total_len == 0))
> return 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> if (!pipe->readers) {
> send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> * after waiting we need to re-check whether the pipe
> * become empty while we dropped the lock.
> */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> if (was_empty)
> wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> was_empty = pipe_empty(pipe->head, pipe->tail);
> wake_next_writer = true;
> }
> out:
> if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> wake_next_writer = false;
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> /*
> * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>
> switch (cmd) {
> case FIONREAD:
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> count = 0;
> head = pipe->head;
> tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> count += pipe->bufs[tail & mask].len;
> tail++;
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> return put_user(count, (int __user *)arg);
>
> #ifdef CONFIG_WATCH_QUEUE
> case IOC_WATCH_QUEUE_SET_SIZE: {
> int ret;
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> ret = watch_queue_set_size(pipe, arg);
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> {
> struct pipe_inode_info *pipe = file->private_data;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (file->f_mode & FMODE_READ)
> pipe->readers--;
> if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> struct pipe_inode_info *pipe = filp->private_data;
> int retval = 0;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
> if (filp->f_mode & FMODE_READ)
> retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> /* this can happen only if on == T */
> fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> }
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return retval;
> }
>
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> pipe->nr_accounted = pipe_bufs;
> pipe->user = user;
> mutex_init(&pipe->mutex);
> + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> return pipe;
> }
>
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> filp->private_data = pipe;
> /* OK, we have a pipe and it's pinned down */
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> /* We can only do regular read/write on fifos */
> stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> }
>
> /* Ok! */
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return 0;
>
> err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> goto err;
>
> err:
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
>
> put_pipe_info(inode, pipe);
> return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> if (!pipe)
> return -EBADF;
>
> - __pipe_lock(pipe);
> + mutex_lock(&pipe->mutex);
>
> switch (cmd) {
> case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> break;
> }
>
> - __pipe_unlock(pipe);
> + mutex_unlock(&pipe->mutex);
> return ret;
> }
>
> --
> 2.43.0
>
--
Jan Kara <[email protected]>
SUSE Labs, CR

2024-02-02 12:26:21

by Sedat Dilek

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <[email protected]> wrote:
>
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Jan Kara <[email protected]>
> > Signed-off-by: Kent Overstreet <[email protected]>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?
>
> Honza

Hi,

"Numbers talk - Bullshit walks." (Linus Torvalds)

In things of pipes - I normally benchmark like this (example):

root# cat /dev/sdc | pipebench > /dev/null

Do you have numbers for your patch-series?

Thanks.

BG,
-Sedat-

[1] https://packages.debian.org/pipebench

>
> > ---
> > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
> > 1 file changed, 36 insertions(+), 45 deletions(-)
> >
> > diff --git a/fs/pipe.c b/fs/pipe.c
> > index f1adbfe743d4..50c8a8596b52 100644
> > --- a/fs/pipe.c
> > +++ b/fs/pipe.c
> > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
> > * -- Manfred Spraul <[email protected]> 2002-05-09
> > */
> >
> > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> > +#define cmp_int(l, r) ((l > r) - (l < r))
> > +
> > +#ifdef CONFIG_PROVE_LOCKING
> > +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> > + const struct lockdep_map *b)
> > {
> > - if (pipe->files)
> > - mutex_lock_nested(&pipe->mutex, subclass);
> > + return cmp_int((unsigned long) a, (unsigned long) b);
> > }
> > +#endif
> >
> > void pipe_lock(struct pipe_inode_info *pipe)
> > {
> > - /*
> > - * pipe_lock() nests non-pipe inode locks (for writing to a file)
> > - */
> > - pipe_lock_nested(pipe, I_MUTEX_PARENT);
> > + if (pipe->files)
> > + mutex_lock(&pipe->mutex);
> > }
> > EXPORT_SYMBOL(pipe_lock);
> >
> > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
> > }
> > EXPORT_SYMBOL(pipe_unlock);
> >
> > -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> > -{
> > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> > -}
> > -
> > -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> > -{
> > - mutex_unlock(&pipe->mutex);
> > -}
> > -
> > void pipe_double_lock(struct pipe_inode_info *pipe1,
> > struct pipe_inode_info *pipe2)
> > {
> > BUG_ON(pipe1 == pipe2);
> >
> > - if (pipe1 < pipe2) {
> > - pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> > - pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> > - } else {
> > - pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> > - pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> > - }
> > + if (pipe1 > pipe2)
> > + swap(pipe1, pipe2);
> > +
> > + pipe_lock(pipe1);
> > + pipe_lock(pipe2);
> > }
> >
> > static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > return 0;
> >
> > ret = 0;
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > /*
> > * We only wake up writers if the pipe was full when we started
> > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > ret = -EAGAIN;
> > break;
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > /*
> > * We only get here if we didn't actually read anything.
> > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
> > return -ERESTARTSYS;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
> > wake_next_reader = true;
> > }
> > if (pipe_empty(pipe->head, pipe->tail))
> > wake_next_reader = false;
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > if (was_full)
> > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> > if (unlikely(total_len == 0))
> > return 0;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > if (!pipe->readers) {
> > send_sig(SIGPIPE, current, 0);
> > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> > * after waiting we need to re-check whether the pipe
> > * become empty while we dropped the lock.
> > */
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > if (was_empty)
> > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
> > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > was_empty = pipe_empty(pipe->head, pipe->tail);
> > wake_next_writer = true;
> > }
> > out:
> > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
> > wake_next_writer = false;
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > /*
> > * If we do do a wakeup event, we do a 'sync' wakeup, because we
> > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >
> > switch (cmd) {
> > case FIONREAD:
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > count = 0;
> > head = pipe->head;
> > tail = pipe->tail;
> > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> > count += pipe->bufs[tail & mask].len;
> > tail++;
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > return put_user(count, (int __user *)arg);
> >
> > #ifdef CONFIG_WATCH_QUEUE
> > case IOC_WATCH_QUEUE_SET_SIZE: {
> > int ret;
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > ret = watch_queue_set_size(pipe, arg);
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return ret;
> > }
> >
> > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
> > {
> > struct pipe_inode_info *pipe = file->private_data;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > if (file->f_mode & FMODE_READ)
> > pipe->readers--;
> > if (file->f_mode & FMODE_WRITE)
> > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
> > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
> > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > put_pipe_info(inode, pipe);
> > return 0;
> > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> > struct pipe_inode_info *pipe = filp->private_data;
> > int retval = 0;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> > if (filp->f_mode & FMODE_READ)
> > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
> > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
> > /* this can happen only if on == T */
> > fasync_helper(-1, filp, 0, &pipe->fasync_readers);
> > }
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return retval;
> > }
> >
> > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
> > pipe->nr_accounted = pipe_bufs;
> > pipe->user = user;
> > mutex_init(&pipe->mutex);
> > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
> > return pipe;
> > }
> >
> > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > filp->private_data = pipe;
> > /* OK, we have a pipe and it's pinned down */
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > /* We can only do regular read/write on fifos */
> > stream_open(inode, filp);
> > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > }
> >
> > /* Ok! */
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return 0;
> >
> > err_rd:
> > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
> > goto err;
> >
> > err:
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> >
> > put_pipe_info(inode, pipe);
> > return ret;
> > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> > if (!pipe)
> > return -EBADF;
> >
> > - __pipe_lock(pipe);
> > + mutex_lock(&pipe->mutex);
> >
> > switch (cmd) {
> > case F_SETPIPE_SZ:
> > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
> > break;
> > }
> >
> > - __pipe_unlock(pipe);
> > + mutex_unlock(&pipe->mutex);
> > return ret;
> > }
> >
> > --
> > 2.43.0
> >
> --
> Jan Kara <[email protected]>
> SUSE Labs, CR
>

2024-02-02 12:48:09

by Kent Overstreet

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > ordering, but we cannot device a total ordering on an unbounded number
> > of elements with only a few subclasses.
> >
> > the replacement is to define lock ordering with a proper comparison
> > function.
> >
> > fs/pipe.c was already doing everything correctly otherwise, nothing
> > much changes here.
> >
> > Cc: Alexander Viro <[email protected]>
> > Cc: Christian Brauner <[email protected]>
> > Cc: Jan Kara <[email protected]>
> > Signed-off-by: Kent Overstreet <[email protected]>
>
> I had to digest for a while what this new lockdep lock ordering feature is
> about. I have one pending question - what is the motivation of this
> conversion of pipe code? AFAIU we don't have any problems with lockdep
> annotations on pipe->mutex because there are always only two subclasses?

It's one of the easier conversions to do, and ideally /all/ users of
subclasses would go away.

Start with the easier ones, figure out those patterns, then the
harder...

2024-02-05 09:54:47

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <[email protected]> wrote:
> >
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <[email protected]>
> > > Cc: Christian Brauner <[email protected]>
> > > Cc: Jan Kara <[email protected]>
> > > Signed-off-by: Kent Overstreet <[email protected]>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
> >
> > Honza
>
> Hi,
>
> "Numbers talk - Bullshit walks." (Linus Torvalds)
>
> In things of pipes - I normally benchmark like this (example):
>
> root# cat /dev/sdc | pipebench > /dev/null
>
> Do you have numbers for your patch-series?

Sedat AFAIU this patch is not about performance at all but rather about
lockdep instrumentation... But maybe I'm missing your point?

Honza

--
Jan Kara <[email protected]>
SUSE Labs, CR

2024-02-05 10:00:48

by Sedat Dilek

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Mon, Feb 5, 2024 at 10:54 AM Jan Kara <[email protected]> wrote:
>
> On Fri 02-02-24 13:25:20, Sedat Dilek wrote:
> > On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <[email protected]> wrote:
> > >
> > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > > ordering, but we cannot device a total ordering on an unbounded number
> > > > of elements with only a few subclasses.
> > > >
> > > > the replacement is to define lock ordering with a proper comparison
> > > > function.
> > > >
> > > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > > much changes here.
> > > >
> > > > Cc: Alexander Viro <[email protected]>
> > > > Cc: Christian Brauner <[email protected]>
> > > > Cc: Jan Kara <[email protected]>
> > > > Signed-off-by: Kent Overstreet <[email protected]>
> > >
> > > I had to digest for a while what this new lockdep lock ordering feature is
> > > about. I have one pending question - what is the motivation of this
> > > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > > annotations on pipe->mutex because there are always only two subclasses?
> > >
> > > Honza
> >
> > Hi,
> >
> > "Numbers talk - Bullshit walks." (Linus Torvalds)
> >
> > In things of pipes - I normally benchmark like this (example):
> >
> > root# cat /dev/sdc | pipebench > /dev/null
> >
> > Do you have numbers for your patch-series?
>
> Sedat AFAIU this patch is not about performance at all but rather about
> lockdep instrumentation... But maybe I'm missing your point?
>

Sorry, I missed the point, Jan.

-Sedat-

2024-02-05 10:17:32

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn

On Fri 02-02-24 07:47:50, Kent Overstreet wrote:
> On Fri, Feb 02, 2024 at 01:03:57PM +0100, Jan Kara wrote:
> > On Fri 26-01-24 21:08:28, Kent Overstreet wrote:
> > > *_lock_nested() is fundamentally broken; lockdep needs to check lock
> > > ordering, but we cannot device a total ordering on an unbounded number
> > > of elements with only a few subclasses.
> > >
> > > the replacement is to define lock ordering with a proper comparison
> > > function.
> > >
> > > fs/pipe.c was already doing everything correctly otherwise, nothing
> > > much changes here.
> > >
> > > Cc: Alexander Viro <[email protected]>
> > > Cc: Christian Brauner <[email protected]>
> > > Cc: Jan Kara <[email protected]>
> > > Signed-off-by: Kent Overstreet <[email protected]>
> >
> > I had to digest for a while what this new lockdep lock ordering feature is
> > about. I have one pending question - what is the motivation of this
> > conversion of pipe code? AFAIU we don't have any problems with lockdep
> > annotations on pipe->mutex because there are always only two subclasses?
>
> It's one of the easier conversions to do, and ideally /all/ users of
> subclasses would go away.
>
> Start with the easier ones, figure out those patterns, then the
> harder...

I see, thanks for explanation. So in the pipes case I actually like that
the patch makes the locking less obfuscated with lockdep details (to which
I'm mostly used to but still ;)). So feel free to add:

Reviewed-by: Jan Kara <[email protected]>

for this patch. I'm not 100% convinced it will be always possible to
replace subclasses with the new ordering mechanism but I guess time will
show.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR