2024-01-19 10:27:31

by shaozhengchao

[permalink] [raw]
Subject: [PATCH v3] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file

I analyze the potential sleeping issue of the following processes:
Thread A Thread B
.. netlink_create //ref = 1
do_mq_notify ...
sock = netlink_getsockbyfilp ... //ref = 2
info->notify_sock = sock; ...
.. netlink_sendmsg
.. skb = netlink_alloc_large_skb //skb->head is vmalloced
.. netlink_unicast
.. sk = netlink_getsockbyportid //ref = 3
.. netlink_sendskb
.. __netlink_sendskb
.. skb_queue_tail //put skb to sk_receive_queue
.. sock_put //ref = 2
.. ...
.. netlink_release
.. deferred_put_nlk_sk //ref = 1
mqueue_flush_file
spin_lock
remove_notification
netlink_sendskb
sock_put //ref = 0
sk_free
...
__sk_destruct
netlink_sock_destruct
skb_queue_purge //get skb from sk_receive_queue
...
__skb_queue_purge_reason
kfree_skb_reason
__kfree_skb
...
skb_release_all
skb_release_head_state
netlink_skb_destructor
vfree(skb->head) //sleeping while holding spinlock

In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
When the mqueue executes flush, the sleeping bug will occur. Put sock
after releasing the spinlock.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Zhengchao Shao <[email protected]>
---
v3: Put sock after releasing the spinlock.
v2: CCed some networking maintainer & netdev list
---
ipc/mqueue.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 5eea4dc0509e..4832343b7049 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -664,12 +664,23 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
static int mqueue_flush_file(struct file *filp, fl_owner_t id)
{
struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
+ struct sock *sk = NULL;

spin_lock(&info->lock);
- if (task_tgid(current) == info->notify_owner)
- remove_notification(info);
+ if (task_tgid(current) == info->notify_owner) {
+ if (info->notify_owner != NULL &&
+ info->notify.sigev_notify == SIGEV_THREAD) {
+ sk = info->notify_sock;
+ sock_hold(sk);
+ }

+ remove_notification(info);
+ }
spin_unlock(&info->lock);
+
+ if (sk)
+ sock_put(sk);
+
return 0;
}

--
2.34.1



2024-01-19 13:09:29

by Eric Dumazet

[permalink] [raw]
Subject: Re: [PATCH v3] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file

On Fri, Jan 19, 2024 at 11:27 AM Zhengchao Shao
<[email protected]> wrote:
>
> I analyze the potential sleeping issue of the following processes:
> Thread A Thread B
> ... netlink_create //ref = 1
> do_mq_notify ...
> sock = netlink_getsockbyfilp ... //ref = 2
> info->notify_sock = sock; ...
> ... netlink_sendmsg
> ... skb = netlink_alloc_large_skb //skb->head is vmalloced
> ... netlink_unicast
> ... sk = netlink_getsockbyportid //ref = 3
> ... netlink_sendskb
> ... __netlink_sendskb
> ... skb_queue_tail //put skb to sk_receive_queue
> ... sock_put //ref = 2
> ... ...
> ... netlink_release
> ... deferred_put_nlk_sk //ref = 1
> mqueue_flush_file
> spin_lock
> remove_notification
> netlink_sendskb
> sock_put //ref = 0
> sk_free
> ...
> __sk_destruct
> netlink_sock_destruct
> skb_queue_purge //get skb from sk_receive_queue
> ...
> __skb_queue_purge_reason
> kfree_skb_reason
> __kfree_skb
> ...
> skb_release_all
> skb_release_head_state
> netlink_skb_destructor
> vfree(skb->head) //sleeping while holding spinlock
>
> In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
> vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
> When the mqueue executes flush, the sleeping bug will occur. Put sock
> after releasing the spinlock.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

I think netlink started to use vmalloc() from commit c05cdb1b864f
("netlink: allow large data transfers from user-space")

> Signed-off-by: Zhengchao Shao <[email protected]>
> ---
> v3: Put sock after releasing the spinlock.
> v2: CCed some networking maintainer & netdev list
> ---
> ipc/mqueue.c | 15 +++++++++++++--
> 1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
> index 5eea4dc0509e..4832343b7049 100644
> --- a/ipc/mqueue.c
> +++ b/ipc/mqueue.c
> @@ -664,12 +664,23 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
> static int mqueue_flush_file(struct file *filp, fl_owner_t id)
> {
> struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
> + struct sock *sk = NULL;
>
> spin_lock(&info->lock);
> - if (task_tgid(current) == info->notify_owner)
> - remove_notification(info);
> + if (task_tgid(current) == info->notify_owner) {
> + if (info->notify_owner != NULL &&
> + info->notify.sigev_notify == SIGEV_THREAD) {
> + sk = info->notify_sock;
> + sock_hold(sk);
> + }
>
> + remove_notification(info);
> + }
> spin_unlock(&info->lock);
> +
> + if (sk)
> + sock_put(sk);
> +
> return 0;
> }
>


Note that we could instead call vfree_atomic() from netlink_skb_destructor()

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ed8ffd58ff375f3fa9f262e6f3b4d1a1aaf2731..9c962347cf859f16fc76e4d8a2fd22cdb3d142d6
100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
+ vfree_atomic(skb->head);

skb->head = NULL;
}

These big skbs are quite rare IMO, and we also could attempt
high-order allocations
in netlink_alloc_large_skb(), using kvmalloc() instead of vmalloc()
(next week when net-next opens)

2024-01-20 06:55:15

by shaozhengchao

[permalink] [raw]
Subject: Re: [PATCH v3] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file



On 2024/1/19 21:09, Eric Dumazet wrote:
> On Fri, Jan 19, 2024 at 11:27 AM Zhengchao Shao
> <[email protected]> wrote:
>>
>> I analyze the potential sleeping issue of the following processes:
>> Thread A Thread B
>> ... netlink_create //ref = 1
>> do_mq_notify ...
>> sock = netlink_getsockbyfilp ... //ref = 2
>> info->notify_sock = sock; ...
>> ... netlink_sendmsg
>> ... skb = netlink_alloc_large_skb //skb->head is vmalloced
>> ... netlink_unicast
>> ... sk = netlink_getsockbyportid //ref = 3
>> ... netlink_sendskb
>> ... __netlink_sendskb
>> ... skb_queue_tail //put skb to sk_receive_queue
>> ... sock_put //ref = 2
>> ... ...
>> ... netlink_release
>> ... deferred_put_nlk_sk //ref = 1
>> mqueue_flush_file
>> spin_lock
>> remove_notification
>> netlink_sendskb
>> sock_put //ref = 0
>> sk_free
>> ...
>> __sk_destruct
>> netlink_sock_destruct
>> skb_queue_purge //get skb from sk_receive_queue
>> ...
>> __skb_queue_purge_reason
>> kfree_skb_reason
>> __kfree_skb
>> ...
>> skb_release_all
>> skb_release_head_state
>> netlink_skb_destructor
>> vfree(skb->head) //sleeping while holding spinlock
>>
>> In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
>> vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
>> When the mqueue executes flush, the sleeping bug will occur. Put sock
>> after releasing the spinlock.
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
>
Hi Eric:
> I think netlink started to use vmalloc() from commit c05cdb1b864f
> ("netlink: allow large data transfers from user-space")
>
Thank you for your review. Yes, you are right. Sorry for my mistake.
>> Signed-off-by: Zhengchao Shao <[email protected]>
>> ---
>> v3: Put sock after releasing the spinlock.
>> v2: CCed some networking maintainer & netdev list
>> ---
>> ipc/mqueue.c | 15 +++++++++++++--
>> 1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
>> index 5eea4dc0509e..4832343b7049 100644
>> --- a/ipc/mqueue.c
>> +++ b/ipc/mqueue.c
>> @@ -664,12 +664,23 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
>> static int mqueue_flush_file(struct file *filp, fl_owner_t id)
>> {
>> struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
>> + struct sock *sk = NULL;
>>
>> spin_lock(&info->lock);
>> - if (task_tgid(current) == info->notify_owner)
>> - remove_notification(info);
>> + if (task_tgid(current) == info->notify_owner) {
>> + if (info->notify_owner != NULL &&
>> + info->notify.sigev_notify == SIGEV_THREAD) {
>> + sk = info->notify_sock;
>> + sock_hold(sk);
>> + }
>>
>> + remove_notification(info);
>> + }
>> spin_unlock(&info->lock);
>> +
>> + if (sk)
>> + sock_put(sk);
>> +
>> return 0;
>> }
>>
>
>
> Note that we could instead call vfree_atomic() from netlink_skb_destructor()
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 4ed8ffd58ff375f3fa9f262e6f3b4d1a1aaf2731..9c962347cf859f16fc76e4d8a2fd22cdb3d142d6
> 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
> if (is_vmalloc_addr(skb->head)) {
> if (!skb->cloned ||
> !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
> - vfree(skb->head);
> + vfree_atomic(skb->head);
>
> skb->head = NULL;
> }
>
OK, I will send v4 after verification.
> These big skbs are quite rare IMO, and we also could attempt
> high-order allocations
> in netlink_alloc_large_skb(), using kvmalloc() instead of vmalloc()
> (next week when net-next opens)
>
It looks good to me. I would like to do it if you want...
Thank you.

Zhengchao Shao